orjson

SerializationPythonRustHigh PerformancedataclassesdatetimeNumPy

Library

orjson

Overview

orjson is an ultra-fast Python JSON library written in Rust. It's over 6x faster than the standard json library and natively supports Python-specific types like dataclasses and datetime. As the top candidate for Python JSON library in 2025, migration from ujson is recommended for performance-focused applications, and its adoption is rapidly increasing in API development requiring speed optimization.

Details

orjson 3.10 is a groundbreaking library that brings Rust's memory safety and high performance to the Python ecosystem. While maintaining complete compatibility with the standard library, it natively supports Python-specific types such as dataclasses, datetime, NumPy arrays, and UUID, with fine-grained output control through options. It provides high-quality implementation that meets production environment requirements, including RFC 3339-compliant datetime serialization, microsecond-precision time processing, and secure UTF-8 validation.

Key Features

Ultra-Fast: Over 6x faster than the standard json library
Python Type Support: Native support for dataclasses, datetime, and NumPy
RFC Compliant: RFC 3339-compliant datetime serialization
Memory Efficient: Low memory footprint through Rust-based implementation
Type Safe: Robust memory safety derived from Rust
Compatibility: Fully compatible API with standard json library

Pros and Cons

Pros

Overwhelming speed compared to standard json library (6x+ faster)
Standard support for Python-specific types like dataclasses, datetime, NumPy arrays
RFC 3339-compliant datetime processing following international standards
Robust memory safety and crash resistance through Rust-based implementation
Easy replacement with identical API to standard library
Extensive production environment operational experience

Cons

Requires compiled binaries during installation (limitations on some platforms)
Larger memory footprint (binary size) than standard library
More limited extensibility for custom encoders compared to standard library
Limited debug information making troubleshooting difficult in some cases
Rust toolchain dependency (when building from source)
Performance difference with standard library is small for small-scale data

Reference Pages

Code Examples

Basic Setup

# Install orjson
pip install orjson

# Install with dependencies
pip install "orjson>=3.10,<4"

# Pipfile specification (pipenv)
echo 'orjson = "^3.10"' >> Pipfile

# requirements.txt specification
echo 'orjson >= 3.10,<4' >> requirements.txt

Basic Serialization

import orjson
import datetime
import uuid

# Basic data structure
user_data = {
    "id": 123,
    "name": "John Doe",
    "email": "[email protected]",
    "is_active": True,
    "balance": 1234.56,
    "tags": ["admin", "premium"],
    "metadata": {
        "created_at": datetime.datetime.now(),
        "last_login": None
    }
}

# Serialization
json_bytes = orjson.dumps(user_data)
print(f"JSON bytes: {json_bytes}")
print(f"Size: {len(json_bytes)} bytes")

# Deserialization
decoded_data = orjson.loads(json_bytes)
print(f"Decoded: {decoded_data}")

# Loading from string
json_string = '{"name": "John Doe", "age": 30, "active": true}'
parsed_data = orjson.loads(json_string)
print(f"Parsed: {parsed_data}")

# Support for bytes, bytearray, memoryview
data_from_bytes = orjson.loads(b'{"key": "value"}')
data_from_bytearray = orjson.loads(bytearray(b'{"key": "value"}'))
data_from_memoryview = orjson.loads(memoryview(b'{"key": "value"}'))

print("All input types supported:", data_from_bytes, data_from_bytearray, data_from_memoryview)

dataclasses and datetime Support

import orjson
import dataclasses
import datetime
import zoneinfo
from typing import List, Optional

@dataclasses.dataclass
class User:
    id: int
    name: str
    email: str
    created_at: datetime.datetime
    last_login: Optional[datetime.datetime] = None
    is_active: bool = dataclasses.field(default=True)
    roles: List[str] = dataclasses.field(default_factory=list)

@dataclasses.dataclass  
class Organization:
    id: int
    name: str
    users: List[User]
    founded_date: datetime.date
    
# Create dataclass instances
user1 = User(
    id=1,
    name="John Doe",
    email="[email protected]",
    created_at=datetime.datetime(2023, 1, 15, 10, 30, 0, tzinfo=zoneinfo.ZoneInfo("America/New_York")),
    last_login=datetime.datetime.now(zoneinfo.ZoneInfo("UTC")),
    roles=["admin", "user"]
)

user2 = User(
    id=2,
    name="Jane Smith", 
    email="[email protected]",
    created_at=datetime.datetime(2023, 6, 20, 14, 45, 30, 123456),
    roles=["user"]
)

org = Organization(
    id=100,
    name="Example Corp",
    users=[user1, user2],
    founded_date=datetime.date(2020, 4, 1)
)

# Automatic dataclass serialization
org_json = orjson.dumps(org)
print(f"Organization JSON: {org_json}")

# Various datetime format support
datetime_examples = {
    "naive_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0),
    "utc_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo("UTC")),
    "est_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo("America/New_York")),
    "date_only": datetime.date(2025, 1, 1),
    "time_only": datetime.time(12, 30, 45, 123456)
}

# Default datetime serialization
default_json = orjson.dumps(datetime_examples)
print(f"Default datetime: {default_json}")

# Process naive as UTC
naive_utc_json = orjson.dumps(datetime_examples, option=orjson.OPT_NAIVE_UTC)
print(f"Naive UTC: {naive_utc_json}")

# Omit microseconds
no_microseconds_json = orjson.dumps(datetime_examples, option=orjson.OPT_OMIT_MICROSECONDS)
print(f"No microseconds: {no_microseconds_json}")

# Use Z suffix for UTC time
utc_z_json = orjson.dumps(datetime_examples, option=orjson.OPT_UTC_Z)
print(f"UTC Z format: {utc_z_json}")

Advanced Configuration and Options

import orjson
import numpy as np
import decimal
import enum
from dataclasses import dataclass
from typing import Any

# Custom enum definition
class Status(enum.Enum):
    ACTIVE = "active"
    INACTIVE = "inactive"
    PENDING = "pending"

class Priority(enum.Enum):
    LOW = 1
    MEDIUM = 2
    HIGH = 3

@dataclass
class Task:
    id: int
    title: str
    status: Status
    priority: Priority
    data: np.ndarray

# Complex data including NumPy arrays
task_data = Task(
    id=123,
    title="Important Task",
    status=Status.ACTIVE,
    priority=Priority.HIGH,
    data=np.array([[1, 2, 3], [4, 5, 6]])
)

# Serialization with NumPy support enabled
numpy_json = orjson.dumps(task_data, option=orjson.OPT_SERIALIZE_NUMPY)
print(f"NumPy serialization: {numpy_json}")

# Combination of multiple options
combined_options = (
    orjson.OPT_NAIVE_UTC |           # Process naive datetime as UTC
    orjson.OPT_SERIALIZE_NUMPY |     # Serialize NumPy arrays
    orjson.OPT_SORT_KEYS |           # Sort keys
    orjson.OPT_INDENT_2              # Indent with 2 spaces
)

test_data = {
    "timestamp": datetime.datetime(2025, 1, 1, 12, 0, 0),
    "matrix": np.array([[1, 2], [3, 4]]),
    "z_key": "last",
    "a_key": "first",
    "m_key": "middle"
}

formatted_json = orjson.dumps(test_data, option=combined_options)
print(f"Combined options: {formatted_json.decode()}")

# Non-string key support
non_str_keys_data = {
    uuid.uuid4(): "uuid key",
    datetime.datetime(2025, 1, 1): "datetime key",
    datetime.date(2025, 1, 1): "date key",
    42: "integer key"
}

non_str_json = orjson.dumps(non_str_keys_data, option=orjson.OPT_NON_STR_KEYS)
print(f"Non-string keys: {non_str_json}")

# Strict integer mode (53-bit limit)
large_integers = {
    "safe_int": 9007199254740991,      # 2^53 - 1
    "unsafe_int": 9007199254740992     # 2^53
}

# Normal mode (64-bit integer support)
normal_int_json = orjson.dumps(large_integers)
print(f"Normal integers: {normal_int_json}")

# Strict mode (will cause error)
try:
    strict_int_json = orjson.dumps(large_integers, option=orjson.OPT_STRICT_INTEGER)
    print(f"Strict integers: {strict_int_json}")
except orjson.JSONEncodeError as e:
    print(f"Strict integer error: {e}")

Custom Type Serialization

import orjson
import decimal
from datetime import datetime, timezone
from typing import Any

# Default function for custom type support
def custom_default(obj: Any) -> Any:
    """Custom type serialization processing"""
    
    if isinstance(obj, decimal.Decimal):
        # Output Decimal type as string
        return str(obj)
    
    elif isinstance(obj, complex):
        # Output complex numbers as dictionary
        return {"real": obj.real, "imag": obj.imag, "_type": "complex"}
    
    elif isinstance(obj, bytes):
        # Output byte sequences as base64 encoded
        import base64
        return {"data": base64.b64encode(obj).decode(), "_type": "bytes"}
    
    elif isinstance(obj, set):
        # Output sets as lists
        return {"items": list(obj), "_type": "set"}
    
    elif hasattr(obj, '__dict__'):
        # Output custom classes as dictionaries
        return {**obj.__dict__, "_type": type(obj).__name__}
    
    # Raise TypeError for unsupported types
    raise TypeError(f"Type {type(obj)} is not JSON serializable")

# Custom class for testing
class CustomObject:
    def __init__(self, name: str, value: int):
        self.name = name
        self.value = value
        self.created_at = datetime.now(timezone.utc)

# Data containing custom types
custom_data = {
    "decimal_value": decimal.Decimal("123.456789"),
    "complex_number": 3 + 4j,
    "byte_data": b"Hello, World!",
    "unique_items": {1, 2, 3, 4, 5},
    "custom_object": CustomObject("test", 42)
}

# Custom serialization
try:
    # Fails without default function
    orjson.dumps(custom_data)
except orjson.JSONEncodeError as e:
    print(f"Error without default: {e}")

# Succeeds with default function
custom_json = orjson.dumps(custom_data, default=custom_default, option=orjson.OPT_INDENT_2)
print(f"Custom serialization: {custom_json.decode()}")

# dataclass pass-through example
@dataclass  
class SecureUser:
    id: int
    name: str
    password: str
    
def secure_default(obj: Any) -> Any:
    """Secure dataclass serialization"""
    if isinstance(obj, SecureUser):
        # Serialize excluding password
        return {"id": obj.id, "name": obj.name}
    raise TypeError

secure_user = SecureUser(1, "John Doe", "secret123")

# Default dataclass serialization (includes password)
default_user_json = orjson.dumps(secure_user)
print(f"Default dataclass: {default_user_json}")

# Custom serialization (excludes password)
secure_user_json = orjson.dumps(
    secure_user, 
    option=orjson.OPT_PASSTHROUGH_DATACLASS,
    default=secure_default
)
print(f"Secure dataclass: {secure_user_json}")

Performance Optimization and Benchmarking

import orjson
import json
import time
import sys
from typing import List, Dict, Any
import datetime
import dataclasses

@dataclasses.dataclass
class PerformanceTestData:
    id: int
    name: str
    email: str
    created_at: datetime.datetime
    metadata: Dict[str, Any]
    tags: List[str]
    is_active: bool

def generate_test_data(count: int) -> List[PerformanceTestData]:
    """Generate test data"""
    data = []
    for i in range(count):
        data.append(PerformanceTestData(
            id=i,
            name=f"User {i}",
            email=f"user{i}@example.com",
            created_at=datetime.datetime.now(),
            metadata={
                "department": f"Department {i % 10}",
                "salary": 50000 + (i * 100),
                "projects": [f"Project {j}" for j in range(i % 5)],
                "settings": {
                    "theme": "dark" if i % 2 else "light",
                    "notifications": i % 3 == 0
                }
            },
            tags=[f"tag{j}" for j in range(i % 3 + 1)],
            is_active=i % 10 != 0
        ))
    return data

def measure_time(func, *args, **kwargs):
    """Measure execution time"""
    start = time.perf_counter()
    result = func(*args, **kwargs)
    end = time.perf_counter()
    return result, (end - start) * 1000  # Return in milliseconds

def benchmark_serialization(data_count: int = 10000):
    """Serialization benchmark"""
    print(f"Benchmarking with {data_count} records...")
    
    # Generate test data
    test_data = generate_test_data(data_count)
    
    # orjson serialization
    orjson_data, orjson_encode_time = measure_time(
        lambda: [orjson.dumps(item) for item in test_data]
    )
    
    # Standard json serialization (requires manual conversion for dataclass)
    json_compatible_data = [dataclasses.asdict(item) for item in test_data]
    
    # Convert datetime to string (standard json cannot handle directly)
    def convert_datetime(obj):
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()
        return obj
    
    json_safe_data = []
    for item in json_compatible_data:
        item_copy = item.copy()
        item_copy['created_at'] = convert_datetime(item_copy['created_at'])
        json_safe_data.append(item_copy)
    
    json_data, json_encode_time = measure_time(
        lambda: [json.dumps(item) for item in json_safe_data]
    )
    
    # Deserialization
    orjson_decoded, orjson_decode_time = measure_time(
        lambda: [orjson.loads(data) for data in orjson_data]
    )
    
    json_decoded, json_decode_time = measure_time(
        lambda: [json.loads(data) for data in json_data]
    )
    
    # Size comparison
    orjson_size = sum(len(data) for data in orjson_data)
    json_size = sum(len(data) for data in json_data)
    
    # Output results
    print("\n=== Serialization Performance ===")
    print(f"orjson encode: {orjson_encode_time:.2f}ms")
    print(f"json encode: {json_encode_time:.2f}ms")
    print(f"orjson speedup: {json_encode_time / orjson_encode_time:.1f}x")
    
    print("\n=== Deserialization Performance ===")
    print(f"orjson decode: {orjson_decode_time:.2f}ms")
    print(f"json decode: {json_decode_time:.2f}ms")
    print(f"orjson speedup: {json_decode_time / orjson_decode_time:.1f}x")
    
    print("\n=== Size Comparison ===")
    print(f"orjson size: {orjson_size / 1024:.2f} KB")
    print(f"json size: {json_size / 1024:.2f} KB")
    print(f"Size difference: {((json_size - orjson_size) / json_size * 100):.1f}%")
    
    print("\n=== Overall Performance ===")
    total_orjson_time = orjson_encode_time + orjson_decode_time
    total_json_time = json_encode_time + json_decode_time
    print(f"Total orjson time: {total_orjson_time:.2f}ms")
    print(f"Total json time: {total_json_time:.2f}ms")
    print(f"Overall speedup: {total_json_time / total_orjson_time:.1f}x")

# Memory usage measurement
def measure_memory_usage():
    """Measure memory usage"""
    import tracemalloc
    
    # Generate large data
    large_data = generate_test_data(50000)
    
    # Measure orjson memory usage
    tracemalloc.start()
    orjson_result = [orjson.dumps(item) for item in large_data]
    orjson_current, orjson_peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    
    # Measure standard json memory usage
    json_safe_data = [dataclasses.asdict(item) for item in large_data]
    for item in json_safe_data:
        if 'created_at' in item:
            item['created_at'] = item['created_at'].isoformat()
    
    tracemalloc.start()
    json_result = [json.dumps(item) for item in json_safe_data]
    json_current, json_peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    
    print("\n=== Memory Usage ===")
    print(f"orjson peak memory: {orjson_peak / 1024 / 1024:.2f} MB")
    print(f"json peak memory: {json_peak / 1024 / 1024:.2f} MB")
    print(f"Memory efficiency: {json_peak / orjson_peak:.1f}x")

if __name__ == "__main__":
    # Run benchmarks
    benchmark_serialization(10000)
    measure_memory_usage()

Error Handling and Debugging

import orjson
import json
from typing import Any

def comprehensive_error_handling():
    """Comprehensive error handling examples"""
    
    # 1. Basic error handling
    invalid_data = {1, 2, 3}  # set cannot be directly serialized
    
    try:
        result = orjson.dumps(invalid_data)
    except orjson.JSONEncodeError as e:
        print(f"Encode error: {e}")
        print(f"Error type: {type(e)}")
    
    # 2. Invalid UTF-8 string handling
    try:
        # Invalid surrogate pair
        invalid_utf8 = '\ud800'  # Lone high surrogate
        orjson.dumps(invalid_utf8)
    except orjson.JSONEncodeError as e:
        print(f"Invalid UTF-8 error: {e}")
        
        # Comparison with standard json
        print(f"Standard json handles it: {json.dumps(invalid_utf8)}")
    
    # 3. Decode error handling
    try:
        # Invalid JSON
        invalid_json = b'{"key": value}'  # value not quoted
        orjson.loads(invalid_json)
    except orjson.JSONDecodeError as e:
        print(f"Decode error: {e}")
        print(f"Error position: line {getattr(e, 'lineno', 'unknown')}, column {getattr(e, 'colno', 'unknown')}")
    
    # 4. Invalid UTF-8 byte sequence handling
    try:
        invalid_bytes = b'"\xed\xa0\x80"'  # Invalid UTF-8 byte sequence
        orjson.loads(invalid_bytes)
    except orjson.JSONDecodeError as e:
        print(f"Invalid UTF-8 bytes error: {e}")
        
        # Workaround: decode with error handling
        try:
            recovered = orjson.loads(invalid_bytes.decode("utf-8", "replace"))
            print(f"Recovered data: {recovered}")
        except Exception as recovery_error:
            print(f"Recovery failed: {recovery_error}")
    
    # 5. Large integer handling
    large_numbers = {
        "max_safe": 9007199254740991,     # 2^53 - 1
        "too_large": 9007199254740992,    # 2^53
        "way_too_large": 2**60
    }
    
    # Normal mode
    normal_result = orjson.dumps(large_numbers)
    print(f"Normal mode: {normal_result}")
    
    # Strict mode
    try:
        strict_result = orjson.dumps(large_numbers, option=orjson.OPT_STRICT_INTEGER)
    except orjson.JSONEncodeError as e:
        print(f"Strict integer mode error: {e}")
    
    # 6. Circular reference detection
    circular_data = {"name": "parent"}
    circular_data["self"] = circular_data
    
    try:
        orjson.dumps(circular_data)
    except orjson.JSONEncodeError as e:
        print(f"Circular reference error: {e}")
    
    # 7. Error handling in custom default functions
    def failing_default(obj: Any) -> Any:
        if isinstance(obj, set):
            return list(obj)
        # Explicitly raise TypeError
        raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
    
    def safe_default(obj: Any) -> Any:
        """Safe default function"""
        try:
            if isinstance(obj, set):
                return {"_type": "set", "items": list(obj)}
            elif hasattr(obj, '__dict__'):
                return {"_type": type(obj).__name__, "data": obj.__dict__}
            else:
                # Fallback: string representation
                return {"_type": "unknown", "str": str(obj)}
        except Exception as e:
            # Final fallback
            return {"_type": "error", "message": str(e)}
    
    problematic_data = {
        "set_data": {1, 2, 3},
        "custom_object": object(),
        "lambda_func": lambda x: x
    }
    
    # Failing default function
    try:
        orjson.dumps(problematic_data, default=failing_default)
    except orjson.JSONEncodeError as e:
        print(f"Default function error: {e}")
    
    # Safe default function
    safe_result = orjson.dumps(problematic_data, default=safe_default, option=orjson.OPT_INDENT_2)
    print(f"Safe default result: {safe_result.decode()}")

def debug_helper_functions():
    """Debug helper functions"""
    
    def safe_dumps(obj: Any, **kwargs) -> bytes:
        """Safe orjson.dumps"""
        try:
            return orjson.dumps(obj, **kwargs)
        except orjson.JSONEncodeError as e:
            print(f"Serialization failed: {e}")
            # Fallback: use standard json
            try:
                fallback_result = json.dumps(obj, default=str, ensure_ascii=False)
                print(f"Fallback to standard json: {fallback_result}")
                return fallback_result.encode()
            except Exception as fallback_error:
                print(f"Fallback also failed: {fallback_error}")
                return b'{"error": "serialization_failed"}'
    
    def safe_loads(data: bytes | str) -> Any:
        """Safe orjson.loads"""
        try:
            return orjson.loads(data)
        except orjson.JSONDecodeError as e:
            print(f"Deserialization failed: {e}")
            # Fallback: use standard json
            try:
                if isinstance(data, bytes):
                    data = data.decode('utf-8', 'replace')
                return json.loads(data)
            except Exception as fallback_error:
                print(f"Fallback also failed: {fallback_error}")
                return {"error": "deserialization_failed"}
    
    def validate_json_compatibility(obj: Any) -> bool:
        """Validate JSON compatibility"""
        try:
            # Test with orjson
            orjson_result = orjson.dumps(obj)
            orjson_parsed = orjson.loads(orjson_result)
            
            # Test with standard json (if possible)
            try:
                json_result = json.dumps(obj, default=str, ensure_ascii=False)
                json_parsed = json.loads(json_result)
                print("Both orjson and standard json succeeded")
                return True
            except Exception:
                print("orjson succeeded, but standard json failed")
                return True
                
        except Exception as e:
            print(f"JSON compatibility check failed: {e}")
            return False
    
    # Validation with test data
    test_cases = [
        {"simple": "data"},
        {1, 2, 3},  # set - expected to fail
        datetime.datetime.now(),  # datetime - succeeds with orjson
        {"valid": True, "number": 42}
    ]
    
    for i, test_case in enumerate(test_cases):
        print(f"\nTest case {i + 1}: {type(test_case).__name__}")
        
        # Safe operation test
        result = safe_dumps(test_case)
        parsed = safe_loads(result)
        
        # Compatibility check
        is_compatible = validate_json_compatibility(test_case)
        print(f"Compatibility: {is_compatible}")

if __name__ == "__main__":
    print("=== Comprehensive Error Handling ===")
    comprehensive_error_handling()
    
    print("\n=== Debug Helper Functions ===") 
    debug_helper_functions()