CBOR

SerializationBinaryIoTWebAuthnRFCMulti-language

Library

CBOR

Overview

CBOR (Concise Binary Object Representation, RFC 8949) is a compact binary object representation format. It achieves more efficient data representation while maintaining JSON compatibility, gaining attention particularly through standard adoption in IoT devices and WebAuthn. With continuous increase in use for security applications and constrained devices, it is positioned as a next-generation data exchange format with design goals of small code size, efficient message size, and extensibility without version negotiation.

Details

CBOR (RFC 8949, formerly RFC 7049) is a binary serialization format with design goals of extremely small code size, efficient message size, and extensibility without version negotiation. It is recommended as the data serialization layer for CoAP (Constrained Application Protocol) and as the foundation for COSE (CBOR Object Signing and Encryption) messages, and is also used in CTAP (Client-to-Authenticator Protocol) for WebAuthn. It supports systems with very limited memory, processor performance, and instruction sets, with numerous implementations optimized for constrained devices.

Key Features

  • Compact Binary Format: More efficient data representation than JSON
  • Self-describing: Describes data structure without schema requirement
  • Extensibility: Feature extension without version negotiation
  • Multi-language Support: Rich implementations available in various programming languages
  • Standards Compliance: International standard by IETF RFC 8949
  • IoT Optimization: Zero-memory-footprint implementations for constrained devices

Pros and Cons

Pros

  • Achieves binary efficiency while maintaining conceptual compatibility with JSON
  • Reliability through standard adoption in IoT devices and WebAuthn
  • Memory-efficient implementations for constrained devices
  • No schema definition required due to self-describing nature
  • Rich data type support (binary data, dates, tagged data, etc.)
  • High portability through multi-language implementations

Cons

  • Lower readability compared to JSON (binary format)
  • Limited tool support due to less widespread adoption than JSON
  • Higher implementation complexity than JSON
  • Difficult to directly verify binary data during debugging
  • Standard adoption in web and APIs is inferior to JSON
  • Fewer ecosystem and library support compared to JSON

Reference Pages

Usage Examples

Basic Encoding and Decoding (Python)

import cbor2
import json

# Encoding basic data
data = {
    "name": "CBOR Example",
    "version": 1.0,
    "features": ["compact", "self-describing", "extensible"],
    "metadata": {
        "created": "2025-01-01",
        "binary_data": b"Hello, CBOR!"
    }
}

# CBOR encoding
cbor_data = cbor2.dumps(data)
print(f"CBOR size: {len(cbor_data)} bytes")

# JSON encoding (for comparison)
json_data = json.dumps(data, default=str).encode('utf-8')
print(f"JSON size: {len(json_data)} bytes")
print(f"CBOR efficiency: {len(cbor_data) / len(json_data):.2%}")

# CBOR decoding
decoded_data = cbor2.loads(cbor_data)
print("Decoded data:", decoded_data)

File Operations and Streaming

import cbor2
from datetime import datetime, timezone

# Example of large dataset
large_dataset = {
    "sensors": [
        {
            "id": f"sensor_{i:04d}",
            "type": "temperature" if i % 2 == 0 else "humidity",
            "readings": [
                {
                    "timestamp": datetime.now(timezone.utc),
                    "value": 20.0 + (i % 50),
                    "unit": "°C" if i % 2 == 0 else "%"
                }
                for _ in range(100)
            ]
        }
        for i in range(100)
    ],
    "metadata": {
        "collection_time": datetime.now(timezone.utc),
        "format_version": "1.2.0"
    }
}

# Save to file
with open('sensor_data.cbor', 'wb') as f:
    cbor2.dump(large_dataset, f)

# Load from file
with open('sensor_data.cbor', 'rb') as f:
    loaded_dataset = cbor2.load(f)

print(f"Loaded {len(loaded_dataset['sensors'])} sensors")

# Streaming encoding (for large data)
import io

def stream_encode_sensors(sensors):
    """Streaming encoding of sensor data"""
    buffer = io.BytesIO()
    
    # Encode header information
    header = {"format": "sensor_stream", "version": "1.0"}
    buffer.write(cbor2.dumps(header))
    
    # Encode each sensor data individually
    for sensor in sensors:
        sensor_cbor = cbor2.dumps(sensor)
        # Encode with length prefix
        buffer.write(cbor2.dumps(len(sensor_cbor)))
        buffer.write(sensor_cbor)
    
    return buffer.getvalue()

# Streaming decoding
def stream_decode_sensors(cbor_stream):
    """Streaming decoding of sensor data"""
    stream = io.BytesIO(cbor_stream)
    
    # Read header
    header = cbor2.load(stream)
    print(f"Stream format: {header}")
    
    sensors = []
    while stream.tell() < len(cbor_stream):
        try:
            # Read data length
            data_length = cbor2.load(stream)
            # Read sensor data
            sensor_data = stream.read(data_length)
            sensor = cbor2.loads(sensor_data)
            sensors.append(sensor)
        except:
            break
    
    return sensors

IoT Device Optimization (C-style Python Implementation)

import cbor2
import struct
from typing import Dict, Any

class IoTCBOREncoder:
    """Optimized CBOR encoder for IoT devices"""
    
    @staticmethod
    def encode_sensor_reading(sensor_id: int, value: float, timestamp: int) -> bytes:
        """Efficient encoding of sensor readings"""
        # Compact data structure
        compact_data = [
            sensor_id,      # integer
            int(value * 100),  # fixed-point representation with 2 decimal places
            timestamp       # Unix timestamp
        ]
        
        return cbor2.dumps(compact_data)
    
    @staticmethod
    def encode_device_status(device_id: int, status: Dict[str, Any]) -> bytes:
        """Efficient encoding of device status"""
        # For 24 keys or fewer, keys can be encoded in a single byte
        compact_status = {
            1: status.get('battery', 0),      # Battery level (%)
            2: status.get('signal', 0),       # Signal strength
            3: status.get('temperature', 0),  # Internal temperature
            4: int(status.get('online', False))  # Online status
        }
        
        return cbor2.dumps([device_id, compact_status])

class IoTCBORDecoder:
    """Optimized CBOR decoder for IoT devices"""
    
    @staticmethod
    def decode_sensor_reading(cbor_data: bytes) -> Dict[str, Any]:
        """Decoding of sensor readings"""
        compact_data = cbor2.loads(cbor_data)
        
        return {
            'sensor_id': compact_data[0],
            'value': compact_data[1] / 100.0,
            'timestamp': compact_data[2]
        }
    
    @staticmethod
    def decode_device_status(cbor_data: bytes) -> Dict[str, Any]:
        """Decoding of device status"""
        device_id, compact_status = cbor2.loads(cbor_data)
        
        return {
            'device_id': device_id,
            'battery': compact_status.get(1, 0),
            'signal': compact_status.get(2, 0),
            'temperature': compact_status.get(3, 0),
            'online': bool(compact_status.get(4, 0))
        }

# Usage example
encoder = IoTCBOREncoder()
decoder = IoTCBORDecoder()

# Encode sensor data
sensor_cbor = encoder.encode_sensor_reading(
    sensor_id=101,
    value=23.45,
    timestamp=1640995200
)

print(f"Sensor data CBOR size: {len(sensor_cbor)} bytes")

# Decode
decoded_sensor = decoder.decode_sensor_reading(sensor_cbor)
print("Decoded sensor data:", decoded_sensor)

# Encode device status
status_cbor = encoder.encode_device_status(
    device_id=42,
    status={
        'battery': 85,
        'signal': -65,
        'temperature': 35.2,
        'online': True
    }
)

print(f"Status data CBOR size: {len(status_cbor)} bytes")
decoded_status = decoder.decode_device_status(status_cbor)
print("Decoded status:", decoded_status)

WebAuthn/CTAP Integration

import cbor2
import hashlib
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.asymmetric import ec

class WebAuthnCBOR:
    """CBOR processing for WebAuthn/CTAP"""
    
    @staticmethod
    def create_webauthn_assertion(credential_id: bytes, 
                                signature: bytes, 
                                user_handle: bytes = None) -> bytes:
        """Create WebAuthn authentication assertion"""
        assertion_data = {
            1: credential_id,  # credentialId
            2: signature,      # signature
        }
        
        if user_handle:
            assertion_data[3] = user_handle  # userHandle
        
        # Use canonical CBOR encoding as specified by CTAP
        return cbor2.dumps(assertion_data, canonical=True)
    
    @staticmethod
    def parse_webauthn_assertion(cbor_data: bytes) -> dict:
        """Parse WebAuthn authentication assertion"""
        data = cbor2.loads(cbor_data)
        
        return {
            'credential_id': data.get(1),
            'signature': data.get(2),
            'user_handle': data.get(3)
        }
    
    @staticmethod
    def create_ctap_request(command: int, parameters: dict) -> bytes:
        """Create CTAP request"""
        # CTAP command structure
        ctap_data = {
            0x01: command,     # CTAP command
            0x02: parameters   # Parameters
        }
        
        return cbor2.dumps(ctap_data, canonical=True)

# WebAuthn usage example
webauthn = WebAuthnCBOR()

# Create authentication assertion
credential_id = b"example_credential_id_1234567890"
signature = b"example_signature_data_abcdef"
user_handle = b"user123"

assertion_cbor = webauthn.create_webauthn_assertion(
    credential_id=credential_id,
    signature=signature,
    user_handle=user_handle
)

print(f"WebAuthn assertion CBOR size: {len(assertion_cbor)} bytes")

# Parse assertion
parsed_assertion = webauthn.parse_webauthn_assertion(assertion_cbor)
print("Parsed assertion:", parsed_assertion)

# CTAP request example
ctap_request = webauthn.create_ctap_request(
    command=0x01,  # authenticatorMakeCredential
    parameters={
        "clientDataHash": hashlib.sha256(b"client_data").digest(),
        "rp": {"id": "example.com", "name": "Example Corp"},
        "user": {"id": b"user123", "name": "[email protected]"},
        "pubKeyCredParams": [{"type": "public-key", "alg": -7}]
    }
)

print(f"CTAP request size: {len(ctap_request)} bytes")

Advanced CBOR Tags and Custom Types

import cbor2
from datetime import datetime, timezone
import uuid
import base64

# Define custom encoders
def encode_datetime(encoder, value):
    """Custom encoding for datetime (Tag 1: Epoch-based date/time)"""
    encoder.encode(cbor2.CBORTag(1, value.timestamp()))

def encode_uuid(encoder, value):
    """Custom encoding for UUID (Tag 37: UUID)"""
    encoder.encode(cbor2.CBORTag(37, value.bytes))

def encode_regex(encoder, value):
    """Custom encoding for regex (Tag 35: Regular expression)"""
    encoder.encode(cbor2.CBORTag(35, value.pattern))

# Define custom decoders
def decode_datetime(decoder, tag):
    """Custom decoding for datetime"""
    return datetime.fromtimestamp(tag.value, timezone.utc)

def decode_uuid(decoder, tag):
    """Custom decoding for UUID"""
    return uuid.UUID(bytes=tag.value)

def decode_regex(decoder, tag):
    """Custom decoding for regex"""
    import re
    return re.compile(tag.value)

# Configure encoder and decoder
encoder = cbor2.CBOREncoder()
encoder.encode_type(datetime, encode_datetime)
encoder.encode_type(uuid.UUID, encode_uuid)

decoder = cbor2.CBORDecoder()
decoder.tag_hook = {
    1: decode_datetime,
    37: decode_uuid,
    35: decode_regex
}

# Usage example
complex_data = {
    "id": uuid.uuid4(),
    "created_at": datetime.now(timezone.utc),
    "expires_at": datetime(2025, 12, 31, 23, 59, 59, tzinfo=timezone.utc),
    "data": {
        "numbers": [1, 2, 3.14159, -42],
        "binary": b"binary_data_example",
        "nested": {
            "flag": True,
            "null_value": None
        }
    }
}

# Encoding with custom types
import io
buffer = io.BytesIO()
encoder_instance = cbor2.CBOREncoder(buffer)
encoder_instance.encode_type(datetime, encode_datetime)
encoder_instance.encode_type(uuid.UUID, encode_uuid)
encoder_instance.encode(complex_data)

cbor_data = buffer.getvalue()
print(f"Complex data CBOR size: {len(cbor_data)} bytes")

# Decoding with custom types
buffer = io.BytesIO(cbor_data)
decoder_instance = cbor2.CBORDecoder(buffer)
decoder_instance.tag_hook = {
    1: decode_datetime,
    37: decode_uuid
}

decoded_data = decoder_instance.decode()
print("Decoded complex data:", decoded_data)
print(f"UUID type: {type(decoded_data['id'])}")
print(f"DateTime type: {type(decoded_data['created_at'])}")

Performance Comparison and Benchmarks

import cbor2
import json
import pickle
import time
import sys
from typing import Any, Dict

class SerializationBenchmark:
    """Benchmark for serialization formats"""
    
    @staticmethod
    def create_test_data(size: int = 1000) -> Dict[str, Any]:
        """Generate test data"""
        return {
            "metadata": {
                "version": "1.0.0",
                "created": datetime.now().isoformat(),
                "description": "Benchmark test data"
            },
            "data": [
                {
                    "id": i,
                    "name": f"item_{i:05d}",
                    "value": i * 3.14159,
                    "active": i % 2 == 0,
                    "tags": [f"tag_{j}" for j in range(i % 5 + 1)],
                    "metadata": {
                        "category": "test",
                        "priority": i % 10,
                        "binary_data": f"data_{i}".encode('utf-8')
                    }
                }
                for i in range(size)
            ]
        }
    
    @staticmethod
    def benchmark_serialization(data: Any, iterations: int = 100):
        """Benchmark serialization formats"""
        results = {}
        
        # CBOR
        start_time = time.time()
        for _ in range(iterations):
            cbor_data = cbor2.dumps(data)
        cbor_time = time.time() - start_time
        cbor_size = len(cbor_data)
        
        # JSON
        start_time = time.time()
        for _ in range(iterations):
            json_data = json.dumps(data, default=str).encode('utf-8')
        json_time = time.time() - start_time
        json_size = len(json_data)
        
        # Pickle
        start_time = time.time()
        for _ in range(iterations):
            pickle_data = pickle.dumps(data)
        pickle_time = time.time() - start_time
        pickle_size = len(pickle_data)
        
        results = {
            'CBOR': {'time': cbor_time, 'size': cbor_size, 'data': cbor_data},
            'JSON': {'time': json_time, 'size': json_size, 'data': json_data},
            'Pickle': {'time': pickle_time, 'size': pickle_size, 'data': pickle_data}
        }
        
        return results

# Run benchmark
benchmark = SerializationBenchmark()
test_data = benchmark.create_test_data(1000)

print("Serialization Benchmark Results:")
print("=" * 50)

results = benchmark.benchmark_serialization(test_data, iterations=100)

for format_name, metrics in results.items():
    print(f"{format_name}:")
    print(f"  Time: {metrics['time']:.4f} seconds")
    print(f"  Size: {metrics['size']:,} bytes")
    print(f"  Efficiency: {metrics['size'] / results['JSON']['size']:.2%} of JSON")
    print()

# Deserialization benchmark
print("Deserialization Benchmark:")
print("=" * 30)

for format_name, metrics in results.items():
    start_time = time.time()
    
    for _ in range(100):
        if format_name == 'CBOR':
            cbor2.loads(metrics['data'])
        elif format_name == 'JSON':
            json.loads(metrics['data'].decode('utf-8'))
        elif format_name == 'Pickle':
            pickle.loads(metrics['data'])
    
    deserialize_time = time.time() - start_time
    print(f"{format_name} deserialization: {deserialize_time:.4f} seconds")