orjson
Library
orjson
Overview
orjson is an ultra-fast Python JSON library written in Rust. It's over 6x faster than the standard json library and natively supports Python-specific types like dataclasses and datetime. As the top candidate for Python JSON library in 2025, migration from ujson is recommended for performance-focused applications, and its adoption is rapidly increasing in API development requiring speed optimization.
Details
orjson 3.10 is a groundbreaking library that brings Rust's memory safety and high performance to the Python ecosystem. While maintaining complete compatibility with the standard library, it natively supports Python-specific types such as dataclasses, datetime, NumPy arrays, and UUID, with fine-grained output control through options. It provides high-quality implementation that meets production environment requirements, including RFC 3339-compliant datetime serialization, microsecond-precision time processing, and secure UTF-8 validation.
Key Features
- Ultra-Fast: Over 6x faster than the standard json library
- Python Type Support: Native support for dataclasses, datetime, and NumPy
- RFC Compliant: RFC 3339-compliant datetime serialization
- Memory Efficient: Low memory footprint through Rust-based implementation
- Type Safe: Robust memory safety derived from Rust
- Compatibility: Fully compatible API with standard json library
Pros and Cons
Pros
- Overwhelming speed compared to standard json library (6x+ faster)
- Standard support for Python-specific types like dataclasses, datetime, NumPy arrays
- RFC 3339-compliant datetime processing following international standards
- Robust memory safety and crash resistance through Rust-based implementation
- Easy replacement with identical API to standard library
- Extensive production environment operational experience
Cons
- Requires compiled binaries during installation (limitations on some platforms)
- Larger memory footprint (binary size) than standard library
- More limited extensibility for custom encoders compared to standard library
- Limited debug information making troubleshooting difficult in some cases
- Rust toolchain dependency (when building from source)
- Performance difference with standard library is small for small-scale data
Reference Pages
Code Examples
Basic Setup
# Install orjson
pip install orjson
# Install with dependencies
pip install "orjson>=3.10,<4"
# Pipfile specification (pipenv)
echo 'orjson = "^3.10"' >> Pipfile
# requirements.txt specification
echo 'orjson >= 3.10,<4' >> requirements.txt
Basic Serialization
import orjson
import datetime
import uuid
# Basic data structure
user_data = {
"id": 123,
"name": "John Doe",
"email": "[email protected]",
"is_active": True,
"balance": 1234.56,
"tags": ["admin", "premium"],
"metadata": {
"created_at": datetime.datetime.now(),
"last_login": None
}
}
# Serialization
json_bytes = orjson.dumps(user_data)
print(f"JSON bytes: {json_bytes}")
print(f"Size: {len(json_bytes)} bytes")
# Deserialization
decoded_data = orjson.loads(json_bytes)
print(f"Decoded: {decoded_data}")
# Loading from string
json_string = '{"name": "John Doe", "age": 30, "active": true}'
parsed_data = orjson.loads(json_string)
print(f"Parsed: {parsed_data}")
# Support for bytes, bytearray, memoryview
data_from_bytes = orjson.loads(b'{"key": "value"}')
data_from_bytearray = orjson.loads(bytearray(b'{"key": "value"}'))
data_from_memoryview = orjson.loads(memoryview(b'{"key": "value"}'))
print("All input types supported:", data_from_bytes, data_from_bytearray, data_from_memoryview)
dataclasses and datetime Support
import orjson
import dataclasses
import datetime
import zoneinfo
from typing import List, Optional
@dataclasses.dataclass
class User:
id: int
name: str
email: str
created_at: datetime.datetime
last_login: Optional[datetime.datetime] = None
is_active: bool = dataclasses.field(default=True)
roles: List[str] = dataclasses.field(default_factory=list)
@dataclasses.dataclass
class Organization:
id: int
name: str
users: List[User]
founded_date: datetime.date
# Create dataclass instances
user1 = User(
id=1,
name="John Doe",
email="[email protected]",
created_at=datetime.datetime(2023, 1, 15, 10, 30, 0, tzinfo=zoneinfo.ZoneInfo("America/New_York")),
last_login=datetime.datetime.now(zoneinfo.ZoneInfo("UTC")),
roles=["admin", "user"]
)
user2 = User(
id=2,
name="Jane Smith",
email="[email protected]",
created_at=datetime.datetime(2023, 6, 20, 14, 45, 30, 123456),
roles=["user"]
)
org = Organization(
id=100,
name="Example Corp",
users=[user1, user2],
founded_date=datetime.date(2020, 4, 1)
)
# Automatic dataclass serialization
org_json = orjson.dumps(org)
print(f"Organization JSON: {org_json}")
# Various datetime format support
datetime_examples = {
"naive_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0),
"utc_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo("UTC")),
"est_datetime": datetime.datetime(2025, 1, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo("America/New_York")),
"date_only": datetime.date(2025, 1, 1),
"time_only": datetime.time(12, 30, 45, 123456)
}
# Default datetime serialization
default_json = orjson.dumps(datetime_examples)
print(f"Default datetime: {default_json}")
# Process naive as UTC
naive_utc_json = orjson.dumps(datetime_examples, option=orjson.OPT_NAIVE_UTC)
print(f"Naive UTC: {naive_utc_json}")
# Omit microseconds
no_microseconds_json = orjson.dumps(datetime_examples, option=orjson.OPT_OMIT_MICROSECONDS)
print(f"No microseconds: {no_microseconds_json}")
# Use Z suffix for UTC time
utc_z_json = orjson.dumps(datetime_examples, option=orjson.OPT_UTC_Z)
print(f"UTC Z format: {utc_z_json}")
Advanced Configuration and Options
import orjson
import numpy as np
import decimal
import enum
from dataclasses import dataclass
from typing import Any
# Custom enum definition
class Status(enum.Enum):
ACTIVE = "active"
INACTIVE = "inactive"
PENDING = "pending"
class Priority(enum.Enum):
LOW = 1
MEDIUM = 2
HIGH = 3
@dataclass
class Task:
id: int
title: str
status: Status
priority: Priority
data: np.ndarray
# Complex data including NumPy arrays
task_data = Task(
id=123,
title="Important Task",
status=Status.ACTIVE,
priority=Priority.HIGH,
data=np.array([[1, 2, 3], [4, 5, 6]])
)
# Serialization with NumPy support enabled
numpy_json = orjson.dumps(task_data, option=orjson.OPT_SERIALIZE_NUMPY)
print(f"NumPy serialization: {numpy_json}")
# Combination of multiple options
combined_options = (
orjson.OPT_NAIVE_UTC | # Process naive datetime as UTC
orjson.OPT_SERIALIZE_NUMPY | # Serialize NumPy arrays
orjson.OPT_SORT_KEYS | # Sort keys
orjson.OPT_INDENT_2 # Indent with 2 spaces
)
test_data = {
"timestamp": datetime.datetime(2025, 1, 1, 12, 0, 0),
"matrix": np.array([[1, 2], [3, 4]]),
"z_key": "last",
"a_key": "first",
"m_key": "middle"
}
formatted_json = orjson.dumps(test_data, option=combined_options)
print(f"Combined options: {formatted_json.decode()}")
# Non-string key support
non_str_keys_data = {
uuid.uuid4(): "uuid key",
datetime.datetime(2025, 1, 1): "datetime key",
datetime.date(2025, 1, 1): "date key",
42: "integer key"
}
non_str_json = orjson.dumps(non_str_keys_data, option=orjson.OPT_NON_STR_KEYS)
print(f"Non-string keys: {non_str_json}")
# Strict integer mode (53-bit limit)
large_integers = {
"safe_int": 9007199254740991, # 2^53 - 1
"unsafe_int": 9007199254740992 # 2^53
}
# Normal mode (64-bit integer support)
normal_int_json = orjson.dumps(large_integers)
print(f"Normal integers: {normal_int_json}")
# Strict mode (will cause error)
try:
strict_int_json = orjson.dumps(large_integers, option=orjson.OPT_STRICT_INTEGER)
print(f"Strict integers: {strict_int_json}")
except orjson.JSONEncodeError as e:
print(f"Strict integer error: {e}")
Custom Type Serialization
import orjson
import decimal
from datetime import datetime, timezone
from typing import Any
# Default function for custom type support
def custom_default(obj: Any) -> Any:
"""Custom type serialization processing"""
if isinstance(obj, decimal.Decimal):
# Output Decimal type as string
return str(obj)
elif isinstance(obj, complex):
# Output complex numbers as dictionary
return {"real": obj.real, "imag": obj.imag, "_type": "complex"}
elif isinstance(obj, bytes):
# Output byte sequences as base64 encoded
import base64
return {"data": base64.b64encode(obj).decode(), "_type": "bytes"}
elif isinstance(obj, set):
# Output sets as lists
return {"items": list(obj), "_type": "set"}
elif hasattr(obj, '__dict__'):
# Output custom classes as dictionaries
return {**obj.__dict__, "_type": type(obj).__name__}
# Raise TypeError for unsupported types
raise TypeError(f"Type {type(obj)} is not JSON serializable")
# Custom class for testing
class CustomObject:
def __init__(self, name: str, value: int):
self.name = name
self.value = value
self.created_at = datetime.now(timezone.utc)
# Data containing custom types
custom_data = {
"decimal_value": decimal.Decimal("123.456789"),
"complex_number": 3 + 4j,
"byte_data": b"Hello, World!",
"unique_items": {1, 2, 3, 4, 5},
"custom_object": CustomObject("test", 42)
}
# Custom serialization
try:
# Fails without default function
orjson.dumps(custom_data)
except orjson.JSONEncodeError as e:
print(f"Error without default: {e}")
# Succeeds with default function
custom_json = orjson.dumps(custom_data, default=custom_default, option=orjson.OPT_INDENT_2)
print(f"Custom serialization: {custom_json.decode()}")
# dataclass pass-through example
@dataclass
class SecureUser:
id: int
name: str
password: str
def secure_default(obj: Any) -> Any:
"""Secure dataclass serialization"""
if isinstance(obj, SecureUser):
# Serialize excluding password
return {"id": obj.id, "name": obj.name}
raise TypeError
secure_user = SecureUser(1, "John Doe", "secret123")
# Default dataclass serialization (includes password)
default_user_json = orjson.dumps(secure_user)
print(f"Default dataclass: {default_user_json}")
# Custom serialization (excludes password)
secure_user_json = orjson.dumps(
secure_user,
option=orjson.OPT_PASSTHROUGH_DATACLASS,
default=secure_default
)
print(f"Secure dataclass: {secure_user_json}")
Performance Optimization and Benchmarking
import orjson
import json
import time
import sys
from typing import List, Dict, Any
import datetime
import dataclasses
@dataclasses.dataclass
class PerformanceTestData:
id: int
name: str
email: str
created_at: datetime.datetime
metadata: Dict[str, Any]
tags: List[str]
is_active: bool
def generate_test_data(count: int) -> List[PerformanceTestData]:
"""Generate test data"""
data = []
for i in range(count):
data.append(PerformanceTestData(
id=i,
name=f"User {i}",
email=f"user{i}@example.com",
created_at=datetime.datetime.now(),
metadata={
"department": f"Department {i % 10}",
"salary": 50000 + (i * 100),
"projects": [f"Project {j}" for j in range(i % 5)],
"settings": {
"theme": "dark" if i % 2 else "light",
"notifications": i % 3 == 0
}
},
tags=[f"tag{j}" for j in range(i % 3 + 1)],
is_active=i % 10 != 0
))
return data
def measure_time(func, *args, **kwargs):
"""Measure execution time"""
start = time.perf_counter()
result = func(*args, **kwargs)
end = time.perf_counter()
return result, (end - start) * 1000 # Return in milliseconds
def benchmark_serialization(data_count: int = 10000):
"""Serialization benchmark"""
print(f"Benchmarking with {data_count} records...")
# Generate test data
test_data = generate_test_data(data_count)
# orjson serialization
orjson_data, orjson_encode_time = measure_time(
lambda: [orjson.dumps(item) for item in test_data]
)
# Standard json serialization (requires manual conversion for dataclass)
json_compatible_data = [dataclasses.asdict(item) for item in test_data]
# Convert datetime to string (standard json cannot handle directly)
def convert_datetime(obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
return obj
json_safe_data = []
for item in json_compatible_data:
item_copy = item.copy()
item_copy['created_at'] = convert_datetime(item_copy['created_at'])
json_safe_data.append(item_copy)
json_data, json_encode_time = measure_time(
lambda: [json.dumps(item) for item in json_safe_data]
)
# Deserialization
orjson_decoded, orjson_decode_time = measure_time(
lambda: [orjson.loads(data) for data in orjson_data]
)
json_decoded, json_decode_time = measure_time(
lambda: [json.loads(data) for data in json_data]
)
# Size comparison
orjson_size = sum(len(data) for data in orjson_data)
json_size = sum(len(data) for data in json_data)
# Output results
print("\n=== Serialization Performance ===")
print(f"orjson encode: {orjson_encode_time:.2f}ms")
print(f"json encode: {json_encode_time:.2f}ms")
print(f"orjson speedup: {json_encode_time / orjson_encode_time:.1f}x")
print("\n=== Deserialization Performance ===")
print(f"orjson decode: {orjson_decode_time:.2f}ms")
print(f"json decode: {json_decode_time:.2f}ms")
print(f"orjson speedup: {json_decode_time / orjson_decode_time:.1f}x")
print("\n=== Size Comparison ===")
print(f"orjson size: {orjson_size / 1024:.2f} KB")
print(f"json size: {json_size / 1024:.2f} KB")
print(f"Size difference: {((json_size - orjson_size) / json_size * 100):.1f}%")
print("\n=== Overall Performance ===")
total_orjson_time = orjson_encode_time + orjson_decode_time
total_json_time = json_encode_time + json_decode_time
print(f"Total orjson time: {total_orjson_time:.2f}ms")
print(f"Total json time: {total_json_time:.2f}ms")
print(f"Overall speedup: {total_json_time / total_orjson_time:.1f}x")
# Memory usage measurement
def measure_memory_usage():
"""Measure memory usage"""
import tracemalloc
# Generate large data
large_data = generate_test_data(50000)
# Measure orjson memory usage
tracemalloc.start()
orjson_result = [orjson.dumps(item) for item in large_data]
orjson_current, orjson_peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
# Measure standard json memory usage
json_safe_data = [dataclasses.asdict(item) for item in large_data]
for item in json_safe_data:
if 'created_at' in item:
item['created_at'] = item['created_at'].isoformat()
tracemalloc.start()
json_result = [json.dumps(item) for item in json_safe_data]
json_current, json_peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print("\n=== Memory Usage ===")
print(f"orjson peak memory: {orjson_peak / 1024 / 1024:.2f} MB")
print(f"json peak memory: {json_peak / 1024 / 1024:.2f} MB")
print(f"Memory efficiency: {json_peak / orjson_peak:.1f}x")
if __name__ == "__main__":
# Run benchmarks
benchmark_serialization(10000)
measure_memory_usage()
Error Handling and Debugging
import orjson
import json
from typing import Any
def comprehensive_error_handling():
"""Comprehensive error handling examples"""
# 1. Basic error handling
invalid_data = {1, 2, 3} # set cannot be directly serialized
try:
result = orjson.dumps(invalid_data)
except orjson.JSONEncodeError as e:
print(f"Encode error: {e}")
print(f"Error type: {type(e)}")
# 2. Invalid UTF-8 string handling
try:
# Invalid surrogate pair
invalid_utf8 = '\ud800' # Lone high surrogate
orjson.dumps(invalid_utf8)
except orjson.JSONEncodeError as e:
print(f"Invalid UTF-8 error: {e}")
# Comparison with standard json
print(f"Standard json handles it: {json.dumps(invalid_utf8)}")
# 3. Decode error handling
try:
# Invalid JSON
invalid_json = b'{"key": value}' # value not quoted
orjson.loads(invalid_json)
except orjson.JSONDecodeError as e:
print(f"Decode error: {e}")
print(f"Error position: line {getattr(e, 'lineno', 'unknown')}, column {getattr(e, 'colno', 'unknown')}")
# 4. Invalid UTF-8 byte sequence handling
try:
invalid_bytes = b'"\xed\xa0\x80"' # Invalid UTF-8 byte sequence
orjson.loads(invalid_bytes)
except orjson.JSONDecodeError as e:
print(f"Invalid UTF-8 bytes error: {e}")
# Workaround: decode with error handling
try:
recovered = orjson.loads(invalid_bytes.decode("utf-8", "replace"))
print(f"Recovered data: {recovered}")
except Exception as recovery_error:
print(f"Recovery failed: {recovery_error}")
# 5. Large integer handling
large_numbers = {
"max_safe": 9007199254740991, # 2^53 - 1
"too_large": 9007199254740992, # 2^53
"way_too_large": 2**60
}
# Normal mode
normal_result = orjson.dumps(large_numbers)
print(f"Normal mode: {normal_result}")
# Strict mode
try:
strict_result = orjson.dumps(large_numbers, option=orjson.OPT_STRICT_INTEGER)
except orjson.JSONEncodeError as e:
print(f"Strict integer mode error: {e}")
# 6. Circular reference detection
circular_data = {"name": "parent"}
circular_data["self"] = circular_data
try:
orjson.dumps(circular_data)
except orjson.JSONEncodeError as e:
print(f"Circular reference error: {e}")
# 7. Error handling in custom default functions
def failing_default(obj: Any) -> Any:
if isinstance(obj, set):
return list(obj)
# Explicitly raise TypeError
raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable")
def safe_default(obj: Any) -> Any:
"""Safe default function"""
try:
if isinstance(obj, set):
return {"_type": "set", "items": list(obj)}
elif hasattr(obj, '__dict__'):
return {"_type": type(obj).__name__, "data": obj.__dict__}
else:
# Fallback: string representation
return {"_type": "unknown", "str": str(obj)}
except Exception as e:
# Final fallback
return {"_type": "error", "message": str(e)}
problematic_data = {
"set_data": {1, 2, 3},
"custom_object": object(),
"lambda_func": lambda x: x
}
# Failing default function
try:
orjson.dumps(problematic_data, default=failing_default)
except orjson.JSONEncodeError as e:
print(f"Default function error: {e}")
# Safe default function
safe_result = orjson.dumps(problematic_data, default=safe_default, option=orjson.OPT_INDENT_2)
print(f"Safe default result: {safe_result.decode()}")
def debug_helper_functions():
"""Debug helper functions"""
def safe_dumps(obj: Any, **kwargs) -> bytes:
"""Safe orjson.dumps"""
try:
return orjson.dumps(obj, **kwargs)
except orjson.JSONEncodeError as e:
print(f"Serialization failed: {e}")
# Fallback: use standard json
try:
fallback_result = json.dumps(obj, default=str, ensure_ascii=False)
print(f"Fallback to standard json: {fallback_result}")
return fallback_result.encode()
except Exception as fallback_error:
print(f"Fallback also failed: {fallback_error}")
return b'{"error": "serialization_failed"}'
def safe_loads(data: bytes | str) -> Any:
"""Safe orjson.loads"""
try:
return orjson.loads(data)
except orjson.JSONDecodeError as e:
print(f"Deserialization failed: {e}")
# Fallback: use standard json
try:
if isinstance(data, bytes):
data = data.decode('utf-8', 'replace')
return json.loads(data)
except Exception as fallback_error:
print(f"Fallback also failed: {fallback_error}")
return {"error": "deserialization_failed"}
def validate_json_compatibility(obj: Any) -> bool:
"""Validate JSON compatibility"""
try:
# Test with orjson
orjson_result = orjson.dumps(obj)
orjson_parsed = orjson.loads(orjson_result)
# Test with standard json (if possible)
try:
json_result = json.dumps(obj, default=str, ensure_ascii=False)
json_parsed = json.loads(json_result)
print("Both orjson and standard json succeeded")
return True
except Exception:
print("orjson succeeded, but standard json failed")
return True
except Exception as e:
print(f"JSON compatibility check failed: {e}")
return False
# Validation with test data
test_cases = [
{"simple": "data"},
{1, 2, 3}, # set - expected to fail
datetime.datetime.now(), # datetime - succeeds with orjson
{"valid": True, "number": 42}
]
for i, test_case in enumerate(test_cases):
print(f"\nTest case {i + 1}: {type(test_case).__name__}")
# Safe operation test
result = safe_dumps(test_case)
parsed = safe_loads(result)
# Compatibility check
is_compatible = validate_json_compatibility(test_case)
print(f"Compatibility: {is_compatible}")
if __name__ == "__main__":
print("=== Comprehensive Error Handling ===")
comprehensive_error_handling()
print("\n=== Debug Helper Functions ===")
debug_helper_functions()