orjson

SerializationPythonRustHigh PerformancedataclassesdatetimeNumPy

Library

orjson

Overview

orjson is an ultra-fast Python JSON library written in Rust. It's over 6x faster than the standard json library and natively supports Python-specific types like dataclasses and datetime. As the top candidate for Python JSON library in 2025, migration from ujson is recommended for performance-focused applications, and its adoption is rapidly increasing in API development requiring speed optimization.

Details

orjson 3.10 is a groundbreaking library that brings Rust's memory safety and high performance to the Python ecosystem. While maintaining complete compatibility with the standard library, it natively supports Python-specific types such as dataclasses, datetime, NumPy arrays, and UUID, with fine-grained output control through options. It provides high-quality implementation that meets production environment requirements, including RFC 3339-compliant datetime serialization, microsecond-precision time processing, and secure UTF-8 validation.

Key Features

  • Ultra-Fast: Over 6x faster than the standard json library
  • Python Type Support: Native support for dataclasses, datetime, and NumPy
  • RFC Compliant: RFC 3339-compliant datetime serialization
  • Memory Efficient: Low memory footprint through Rust-based implementation
  • Type Safe: Robust memory safety derived from Rust
  • Compatibility: Fully compatible API with standard json library

Pros and Cons

Pros

  • Overwhelming speed compared to standard json library (6x+ faster)
  • Standard support for Python-specific types like dataclasses, datetime, NumPy arrays
  • RFC 3339-compliant datetime processing following international standards
  • Robust memory safety and crash resistance through Rust-based implementation
  • Easy replacement with identical API to standard library
  • Extensive production environment operational experience

Cons

  • Requires compiled binaries during installation (limitations on some platforms)
  • Larger memory footprint (binary size) than standard library
  • More limited extensibility for custom encoders compared to standard library
  • Limited debug information making troubleshooting difficult in some cases
  • Rust toolchain dependency (when building from source)
  • Performance difference with standard library is small for small-scale data

Reference Pages

Code Examples

Basic Setup

# Install orjson
pip install orjson

# Install with dependencies
pip install "orjson>=3.10,<4"

# Pipfile specification (pipenv)
echo 'orjson = "^3.10"' >> Pipfile

# requirements.txt specification
echo 'orjson >= 3.10,<4' >> requirements.txt

Basic Serialization

import orjson
import datetime
import uuid

# Basic data structure
user_data = {
    "id": 123,
    "name": "John Doe",
    "email": "[email protected]",
    "is_active": True,
    "balance": 1234.56,
    "tags": ["admin", "premium"],
    "metadata": {
        "created_at": datetime.datetime.now(),
        "last_login": None
    }
}

# Serialization
json_bytes = orjson.dumps(user_data)
print(f"JSON bytes: {json_bytes}")
print(f"Size: {len(json_bytes)} bytes")

# Deserialization
decoded_data = orjson.loads(json_bytes)
print(f"Decoded: {decoded_data}")

# Loading from string
json_string = '{"name": "John Doe", "age": 30, "active": true}'
parsed_data = orjson.loads(json_string)
print(f"Parsed: {parsed_data}")

# Support for bytes, bytearray, memoryview
data_from_bytes = orjson.loads(b'{"key": "value"}')
data_from_bytearray = orjson.loads(bytearray(b'{"key": "value"}'))
data_from_memoryview = orjson.loads(memoryview(b'{"key": "value"}'))

print("All input types supported:", data_from_bytes, data_from_bytearray, data_from_memoryview)

dataclasses and datetime Support

import orjson
import dataclasses
import datetime
import zoneinfo
from typing import List, Optional

@dataclasses.dataclass
class User:
    id: int
    name: str
    email: str
    created_at: datetime.datetime
    last_login: Optional[datetime.datetime] = None
    is_active: bool = dataclasses.field(default=True)
    roles: List[str] = dataclasses.field(default_factory=list)

@dataclasses.dataclass  
class Organization:
    id: int
    name: str
    users: List[User]
    founded_date: datetime.date
    
# Create dataclass instances
user1 = User(
    id=1,
    name="John Doe",
    email="[email protected]",
    created_at=datetime.datetime(2023, 1, 15, 10, 30, 0, tzinfo=zoneinfo.ZoneInfo("America/New_York")),
    last_login=datetime.datetime.now(zoneinfo.ZoneInfo("UTC")),
    roles=["admin", "user"]
)

user2 = User(
    id=2,
    name="Jane Smith", 
    email="[email protected]",
    created_at=datetime.datetime(2023, 6, 20, 14, 45, 30, 123456),
    roles=["user"]
)

org = Organization(
    id=100,
    name="Example Corp",
    users=[user1, user2],
    founded_date=datetime.date(2020, 4, 1)
)

# Automatic dataclass serialization
org_json = orjson.dumps(org)
print(f"Organization JSON: {org_json}")

# Various datetime format support
datetime_examples = {
    "naive_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0),
    "utc_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo("UTC")),
    "est_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo("America/New_York")),
    "date_only": datetime.date(2025, 1, 1),
    "time_only": datetime.time(12, 30, 45, 123456)
}

# Default datetime serialization
default_json = orjson.dumps(datetime_examples)
print(f"Default datetime: {default_json}")

# Process naive as UTC
naive_utc_json = orjson.dumps(datetime_examples, option=orjson.OPT_NAIVE_UTC)
print(f"Naive UTC: {naive_utc_json}")

# Omit microseconds
no_microseconds_json = orjson.dumps(datetime_examples, option=orjson.OPT_OMIT_MICROSECONDS)
print(f"No microseconds: {no_microseconds_json}")

# Use Z suffix for UTC time
utc_z_json = orjson.dumps(datetime_examples, option=orjson.OPT_UTC_Z)
print(f"UTC Z format: {utc_z_json}")

Advanced Configuration and Options

import orjson
import numpy as np
import decimal
import enum
from dataclasses import dataclass
from typing import Any

# Custom enum definition
class Status(enum.Enum):
    ACTIVE = "active"
    INACTIVE = "inactive"
    PENDING = "pending"

class Priority(enum.Enum):
    LOW = 1
    MEDIUM = 2
    HIGH = 3

@dataclass
class Task:
    id: int
    title: str
    status: Status
    priority: Priority
    data: np.ndarray

# Complex data including NumPy arrays
task_data = Task(
    id=123,
    title="Important Task",
    status=Status.ACTIVE,
    priority=Priority.HIGH,
    data=np.array([[1, 2, 3], [4, 5, 6]])
)

# Serialization with NumPy support enabled
numpy_json = orjson.dumps(task_data, option=orjson.OPT_SERIALIZE_NUMPY)
print(f"NumPy serialization: {numpy_json}")

# Combination of multiple options
combined_options = (
    orjson.OPT_NAIVE_UTC |           # Process naive datetime as UTC
    orjson.OPT_SERIALIZE_NUMPY |     # Serialize NumPy arrays
    orjson.OPT_SORT_KEYS |           # Sort keys
    orjson.OPT_INDENT_2              # Indent with 2 spaces
)

test_data = {
    "timestamp": datetime.datetime(2025, 1, 1, 12, 0, 0),
    "matrix": np.array([[1, 2], [3, 4]]),
    "z_key": "last",
    "a_key": "first",
    "m_key": "middle"
}

formatted_json = orjson.dumps(test_data, option=combined_options)
print(f"Combined options: {formatted_json.decode()}")

# Non-string key support
non_str_keys_data = {
    uuid.uuid4(): "uuid key",
    datetime.datetime(2025, 1, 1): "datetime key",
    datetime.date(2025, 1, 1): "date key",
    42: "integer key"
}

non_str_json = orjson.dumps(non_str_keys_data, option=orjson.OPT_NON_STR_KEYS)
print(f"Non-string keys: {non_str_json}")

# Strict integer mode (53-bit limit)
large_integers = {
    "safe_int": 9007199254740991,      # 2^53 - 1
    "unsafe_int": 9007199254740992     # 2^53
}

# Normal mode (64-bit integer support)
normal_int_json = orjson.dumps(large_integers)
print(f"Normal integers: {normal_int_json}")

# Strict mode (will cause error)
try:
    strict_int_json = orjson.dumps(large_integers, option=orjson.OPT_STRICT_INTEGER)
    print(f"Strict integers: {strict_int_json}")
except orjson.JSONEncodeError as e:
    print(f"Strict integer error: {e}")

Custom Type Serialization

import orjson
import decimal
from datetime import datetime, timezone
from typing import Any

# Default function for custom type support
def custom_default(obj: Any) -> Any:
    """Custom type serialization processing"""
    
    if isinstance(obj, decimal.Decimal):
        # Output Decimal type as string
        return str(obj)
    
    elif isinstance(obj, complex):
        # Output complex numbers as dictionary
        return {"real": obj.real, "imag": obj.imag, "_type": "complex"}
    
    elif isinstance(obj, bytes):
        # Output byte sequences as base64 encoded
        import base64
        return {"data": base64.b64encode(obj).decode(), "_type": "bytes"}
    
    elif isinstance(obj, set):
        # Output sets as lists
        return {"items": list(obj), "_type": "set"}
    
    elif hasattr(obj, '__dict__'):
        # Output custom classes as dictionaries
        return {**obj.__dict__, "_type": type(obj).__name__}
    
    # Raise TypeError for unsupported types
    raise TypeError(f"Type {type(obj)} is not JSON serializable")

# Custom class for testing
class CustomObject:
    def __init__(self, name: str, value: int):
        self.name = name
        self.value = value
        self.created_at = datetime.now(timezone.utc)

# Data containing custom types
custom_data = {
    "decimal_value": decimal.Decimal("123.456789"),
    "complex_number": 3 + 4j,
    "byte_data": b"Hello, World!",
    "unique_items": {1, 2, 3, 4, 5},
    "custom_object": CustomObject("test", 42)
}

# Custom serialization
try:
    # Fails without default function
    orjson.dumps(custom_data)
except orjson.JSONEncodeError as e:
    print(f"Error without default: {e}")

# Succeeds with default function
custom_json = orjson.dumps(custom_data, default=custom_default, option=orjson.OPT_INDENT_2)
print(f"Custom serialization: {custom_json.decode()}")

# dataclass pass-through example
@dataclass  
class SecureUser:
    id: int
    name: str
    password: str
    
def secure_default(obj: Any) -> Any:
    """Secure dataclass serialization"""
    if isinstance(obj, SecureUser):
        # Serialize excluding password
        return {"id": obj.id, "name": obj.name}
    raise TypeError

secure_user = SecureUser(1, "John Doe", "secret123")

# Default dataclass serialization (includes password)
default_user_json = orjson.dumps(secure_user)
print(f"Default dataclass: {default_user_json}")

# Custom serialization (excludes password)
secure_user_json = orjson.dumps(
    secure_user, 
    option=orjson.OPT_PASSTHROUGH_DATACLASS,
    default=secure_default
)
print(f"Secure dataclass: {secure_user_json}")

Performance Optimization and Benchmarking

import orjson
import json
import time
import sys
from typing import List, Dict, Any
import datetime
import dataclasses

@dataclasses.dataclass
class PerformanceTestData:
    id: int
    name: str
    email: str
    created_at: datetime.datetime
    metadata: Dict[str, Any]
    tags: List[str]
    is_active: bool

def generate_test_data(count: int) -> List[PerformanceTestData]:
    """Generate test data"""
    data = []
    for i in range(count):
        data.append(PerformanceTestData(
            id=i,
            name=f"User {i}",
            email=f"user{i}@example.com",
            created_at=datetime.datetime.now(),
            metadata={
                "department": f"Department {i % 10}",
                "salary": 50000 + (i * 100),
                "projects": [f"Project {j}" for j in range(i % 5)],
                "settings": {
                    "theme": "dark" if i % 2 else "light",
                    "notifications": i % 3 == 0
                }
            },
            tags=[f"tag{j}" for j in range(i % 3 + 1)],
            is_active=i % 10 != 0
        ))
    return data

def measure_time(func, *args, **kwargs):
    """Measure execution time"""
    start = time.perf_counter()
    result = func(*args, **kwargs)
    end = time.perf_counter()
    return result, (end - start) * 1000  # Return in milliseconds

def benchmark_serialization(data_count: int = 10000):
    """Serialization benchmark"""
    print(f"Benchmarking with {data_count} records...")
    
    # Generate test data
    test_data = generate_test_data(data_count)
    
    # orjson serialization
    orjson_data, orjson_encode_time = measure_time(
        lambda: [orjson.dumps(item) for item in test_data]
    )
    
    # Standard json serialization (requires manual conversion for dataclass)
    json_compatible_data = [dataclasses.asdict(item) for item in test_data]
    
    # Convert datetime to string (standard json cannot handle directly)
    def convert_datetime(obj):
        if isinstance(obj, datetime.datetime):
            return obj.isoformat()
        return obj
    
    json_safe_data = []
    for item in json_compatible_data:
        item_copy = item.copy()
        item_copy['created_at'] = convert_datetime(item_copy['created_at'])
        json_safe_data.append(item_copy)
    
    json_data, json_encode_time = measure_time(
        lambda: [json.dumps(item) for item in json_safe_data]
    )
    
    # Deserialization
    orjson_decoded, orjson_decode_time = measure_time(
        lambda: [orjson.loads(data) for data in orjson_data]
    )
    
    json_decoded, json_decode_time = measure_time(
        lambda: [json.loads(data) for data in json_data]
    )
    
    # Size comparison
    orjson_size = sum(len(data) for data in orjson_data)
    json_size = sum(len(data) for data in json_data)
    
    # Output results
    print("\n=== Serialization Performance ===")
    print(f"orjson encode: {orjson_encode_time:.2f}ms")
    print(f"json encode: {json_encode_time:.2f}ms")
    print(f"orjson speedup: {json_encode_time / orjson_encode_time:.1f}x")
    
    print("\n=== Deserialization Performance ===")
    print(f"orjson decode: {orjson_decode_time:.2f}ms")
    print(f"json decode: {json_decode_time:.2f}ms")
    print(f"orjson speedup: {json_decode_time / orjson_decode_time:.1f}x")
    
    print("\n=== Size Comparison ===")
    print(f"orjson size: {orjson_size / 1024:.2f} KB")
    print(f"json size: {json_size / 1024:.2f} KB")
    print(f"Size difference: {((json_size - orjson_size) / json_size * 100):.1f}%")
    
    print("\n=== Overall Performance ===")
    total_orjson_time = orjson_encode_time + orjson_decode_time
    total_json_time = json_encode_time + json_decode_time
    print(f"Total orjson time: {total_orjson_time:.2f}ms")
    print(f"Total json time: {total_json_time:.2f}ms")
    print(f"Overall speedup: {total_json_time / total_orjson_time:.1f}x")

# Memory usage measurement
def measure_memory_usage():
    """Measure memory usage"""
    import tracemalloc
    
    # Generate large data
    large_data = generate_test_data(50000)
    
    # Measure orjson memory usage
    tracemalloc.start()
    orjson_result = [orjson.dumps(item) for item in large_data]
    orjson_current, orjson_peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    
    # Measure standard json memory usage
    json_safe_data = [dataclasses.asdict(item) for item in large_data]
    for item in json_safe_data:
        if 'created_at' in item:
            item['created_at'] = item['created_at'].isoformat()
    
    tracemalloc.start()
    json_result = [json.dumps(item) for item in json_safe_data]
    json_current, json_peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    
    print("\n=== Memory Usage ===")
    print(f"orjson peak memory: {orjson_peak / 1024 / 1024:.2f} MB")
    print(f"json peak memory: {json_peak / 1024 / 1024:.2f} MB")
    print(f"Memory efficiency: {json_peak / orjson_peak:.1f}x")

if __name__ == "__main__":
    # Run benchmarks
    benchmark_serialization(10000)
    measure_memory_usage()

Error Handling and Debugging

import orjson
import json
from typing import Any

def comprehensive_error_handling():
    """Comprehensive error handling examples"""
    
    # 1. Basic error handling
    invalid_data = {1, 2, 3}  # set cannot be directly serialized
    
    try:
        result = orjson.dumps(invalid_data)
    except orjson.JSONEncodeError as e:
        print(f"Encode error: {e}")
        print(f"Error type: {type(e)}")
    
    # 2. Invalid UTF-8 string handling
    try:
        # Invalid surrogate pair
        invalid_utf8 = '\ud800'  # Lone high surrogate
        orjson.dumps(invalid_utf8)
    except orjson.JSONEncodeError as e:
        print(f"Invalid UTF-8 error: {e}")
        
        # Comparison with standard json
        print(f"Standard json handles it: {json.dumps(invalid_utf8)}")
    
    # 3. Decode error handling
    try:
        # Invalid JSON
        invalid_json = b'{"key": value}'  # value not quoted
        orjson.loads(invalid_json)
    except orjson.JSONDecodeError as e:
        print(f"Decode error: {e}")
        print(f"Error position: line {getattr(e, 'lineno', 'unknown')}, column {getattr(e, 'colno', 'unknown')}")
    
    # 4. Invalid UTF-8 byte sequence handling
    try:
        invalid_bytes = b'"\xed\xa0\x80"'  # Invalid UTF-8 byte sequence
        orjson.loads(invalid_bytes)
    except orjson.JSONDecodeError as e:
        print(f"Invalid UTF-8 bytes error: {e}")
        
        # Workaround: decode with error handling
        try:
            recovered = orjson.loads(invalid_bytes.decode("utf-8", "replace"))
            print(f"Recovered data: {recovered}")
        except Exception as recovery_error:
            print(f"Recovery failed: {recovery_error}")
    
    # 5. Large integer handling
    large_numbers = {
        "max_safe": 9007199254740991,     # 2^53 - 1
        "too_large": 9007199254740992,    # 2^53
        "way_too_large": 2**60
    }
    
    # Normal mode
    normal_result = orjson.dumps(large_numbers)
    print(f"Normal mode: {normal_result}")
    
    # Strict mode
    try:
        strict_result = orjson.dumps(large_numbers, option=orjson.OPT_STRICT_INTEGER)
    except orjson.JSONEncodeError as e:
        print(f"Strict integer mode error: {e}")
    
    # 6. Circular reference detection
    circular_data = {"name": "parent"}
    circular_data["self"] = circular_data
    
    try:
        orjson.dumps(circular_data)
    except orjson.JSONEncodeError as e:
        print(f"Circular reference error: {e}")
    
    # 7. Error handling in custom default functions
    def failing_default(obj: Any) -> Any:
        if isinstance(obj, set):
            return list(obj)
        # Explicitly raise TypeError
        raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
    
    def safe_default(obj: Any) -> Any:
        """Safe default function"""
        try:
            if isinstance(obj, set):
                return {"_type": "set", "items": list(obj)}
            elif hasattr(obj, '__dict__'):
                return {"_type": type(obj).__name__, "data": obj.__dict__}
            else:
                # Fallback: string representation
                return {"_type": "unknown", "str": str(obj)}
        except Exception as e:
            # Final fallback
            return {"_type": "error", "message": str(e)}
    
    problematic_data = {
        "set_data": {1, 2, 3},
        "custom_object": object(),
        "lambda_func": lambda x: x
    }
    
    # Failing default function
    try:
        orjson.dumps(problematic_data, default=failing_default)
    except orjson.JSONEncodeError as e:
        print(f"Default function error: {e}")
    
    # Safe default function
    safe_result = orjson.dumps(problematic_data, default=safe_default, option=orjson.OPT_INDENT_2)
    print(f"Safe default result: {safe_result.decode()}")

def debug_helper_functions():
    """Debug helper functions"""
    
    def safe_dumps(obj: Any, **kwargs) -> bytes:
        """Safe orjson.dumps"""
        try:
            return orjson.dumps(obj, **kwargs)
        except orjson.JSONEncodeError as e:
            print(f"Serialization failed: {e}")
            # Fallback: use standard json
            try:
                fallback_result = json.dumps(obj, default=str, ensure_ascii=False)
                print(f"Fallback to standard json: {fallback_result}")
                return fallback_result.encode()
            except Exception as fallback_error:
                print(f"Fallback also failed: {fallback_error}")
                return b'{"error": "serialization_failed"}'
    
    def safe_loads(data: bytes | str) -> Any:
        """Safe orjson.loads"""
        try:
            return orjson.loads(data)
        except orjson.JSONDecodeError as e:
            print(f"Deserialization failed: {e}")
            # Fallback: use standard json
            try:
                if isinstance(data, bytes):
                    data = data.decode('utf-8', 'replace')
                return json.loads(data)
            except Exception as fallback_error:
                print(f"Fallback also failed: {fallback_error}")
                return {"error": "deserialization_failed"}
    
    def validate_json_compatibility(obj: Any) -> bool:
        """Validate JSON compatibility"""
        try:
            # Test with orjson
            orjson_result = orjson.dumps(obj)
            orjson_parsed = orjson.loads(orjson_result)
            
            # Test with standard json (if possible)
            try:
                json_result = json.dumps(obj, default=str, ensure_ascii=False)
                json_parsed = json.loads(json_result)
                print("Both orjson and standard json succeeded")
                return True
            except Exception:
                print("orjson succeeded, but standard json failed")
                return True
                
        except Exception as e:
            print(f"JSON compatibility check failed: {e}")
            return False
    
    # Validation with test data
    test_cases = [
        {"simple": "data"},
        {1, 2, 3},  # set - expected to fail
        datetime.datetime.now(),  # datetime - succeeds with orjson
        {"valid": True, "number": 42}
    ]
    
    for i, test_case in enumerate(test_cases):
        print(f"\nTest case {i + 1}: {type(test_case).__name__}")
        
        # Safe operation test
        result = safe_dumps(test_case)
        parsed = safe_loads(result)
        
        # Compatibility check
        is_compatible = validate_json_compatibility(test_case)
        print(f"Compatibility: {is_compatible}")

if __name__ == "__main__":
    print("=== Comprehensive Error Handling ===")
    comprehensive_error_handling()
    
    print("\n=== Debug Helper Functions ===") 
    debug_helper_functions()