Cerberus
Library
Cerberus
Overview
Cerberus is a lightweight and extensible data validation library specifically for Python. It provides powerful yet simple validation functionality "out of the box" and is designed as a pure Python library with no external dependencies. It adopts dictionary-based schema definitions, making integration with configuration files like YAML and JSON easy. As of 2025, it continues to be maintained according to semantic versioning and supports a wide range of CPython and PyPy versions as a stable validation solution.
Details
Cerberus 1.3 series is the latest stable version as of 2025, with continuous maintenance following semantic versioning. Since it builds schemas using Python's standard types (dict, list, string, etc.), schemas can be defined in various formats such as PyYAML and JSON. Unlike other validation tools, it doesn't halt on the first error but processes the entire document before returning False, allowing you to access a list of issues through the errors() method. While having similar functionality to Pydantic, it adopts a lighter and simpler design philosophy.
Key Features
- Lightweight Design: Pure Python library with no external dependencies
- Dictionary-based Schema: Intuitive and readable schema definitions
- Extensibility: Support for both class-based and function-based custom validators
- Comprehensive Error Handling: Detailed error reports after complete processing
- Configuration File Integration: Natural integration with YAML, JSON, etc.
- Multi-platform: Support for wide range of CPython and PyPy versions
Pros and Cons
Pros
- Simple and lightweight with no dependencies
- Intuitive definition method through dictionary-type schemas
- High compatibility with YAML/JSON configuration files
- Easy implementation of custom validators
- Comprehensive validation results through complete error collection
- Stability through semantic versioning
Cons
- More limited functionality compared to Pydantic
- Lack of type hint support
- Boolean return method not suitable for exception-based processing
- Complexity in schema management for large projects
- Limited IDE support and autocompletion
- Insufficient integration with modern Python's typing system
Reference Pages
Code Examples
Installation and Basic Setup
# Installing Cerberus
pip install cerberus
poetry add cerberus
pipenv install cerberus
# Lightweight library with no dependencies
# Requires Python 3.7 or higher
Basic Schema Definition and Validation
from cerberus import Validator
# Basic schema definition
schema = {
'name': {
'type': 'string',
'required': True,
'minlength': 2,
'maxlength': 50
},
'age': {
'type': 'integer',
'required': True,
'min': 0,
'max': 150
},
'email': {
'type': 'string',
'required': True,
'regex': r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
},
'active': {
'type': 'boolean',
'default': True
},
'score': {
'type': 'float',
'nullable': True,
'min': 0.0,
'max': 100.0
}
}
# Create validator instance
v = Validator(schema)
# Validate valid data
valid_document = {
'name': 'John Doe',
'age': 30,
'email': '[email protected]',
'active': True,
'score': 85.5
}
if v.validate(valid_document):
print("Validation successful")
print("Normalized data:", v.normalized(valid_document))
else:
print("Validation errors:", v.errors)
# Validate invalid data
invalid_document = {
'name': '', # Minimum length violation
'age': -5, # Minimum value violation
'email': 'invalid-email', # Regex violation
'score': 150.0 # Maximum value violation
}
if v.validate(invalid_document):
print("Validation successful")
else:
print("Validation errors:")
for field, errors in v.errors.items():
print(f" {field}: {errors}")
# Default value application
minimal_document = {
'name': 'Jane Smith',
'age': 25,
'email': '[email protected]'
}
if v.validate(minimal_document):
normalized = v.normalized(minimal_document)
print("After default value application:", normalized)
# Result: active is set to True
Nested Schemas and Complex Validation
# Schema for nested objects
address_schema = {
'street': {
'type': 'string',
'required': True,
'minlength': 5
},
'city': {
'type': 'string',
'required': True
},
'postal_code': {
'type': 'string',
'required': True,
'regex': r'^\d{5}(-\d{4})?$'
},
'country': {
'type': 'string',
'allowed': ['USA', 'UK', 'Canada', 'Australia']
}
}
# Nesting in main schema
user_schema = {
'personal_info': {
'type': 'dict',
'required': True,
'schema': {
'first_name': {'type': 'string', 'required': True, 'minlength': 1},
'last_name': {'type': 'string', 'required': True, 'minlength': 1},
'birth_date': {
'type': 'datetime',
'required': True
}
}
},
'address': {
'type': 'dict',
'required': True,
'schema': address_schema
},
'phones': {
'type': 'list',
'required': False,
'schema': {
'type': 'dict',
'schema': {
'type': {
'type': 'string',
'allowed': ['mobile', 'home', 'work']
},
'number': {
'type': 'string',
'regex': r'^\d{3}-\d{3}-\d{4}$'
}
}
}
},
'tags': {
'type': 'list',
'required': False,
'schema': {
'type': 'string',
'minlength': 1,
'maxlength': 20
},
'minlength': 0,
'maxlength': 10
}
}
# Validate complex document
from datetime import datetime
complex_document = {
'personal_info': {
'first_name': 'John',
'last_name': 'Doe',
'birth_date': datetime(1990, 5, 15)
},
'address': {
'street': '123 Main Street',
'city': 'New York',
'postal_code': '10001',
'country': 'USA'
},
'phones': [
{'type': 'mobile', 'number': '123-456-7890'},
{'type': 'home', 'number': '234-567-8901'}
],
'tags': ['developer', 'python', 'web']
}
validator = Validator(user_schema)
if validator.validate(complex_document):
print("Complex document validation successful")
else:
print("Validation errors:")
for field, errors in validator.errors.items():
print(f" {field}: {errors}")
# List element validation
scores_schema = {
'student_scores': {
'type': 'list',
'required': True,
'minlength': 1,
'schema': {
'type': 'dict',
'schema': {
'student_id': {'type': 'string', 'required': True, 'regex': r'^S\d{6}$'},
'name': {'type': 'string', 'required': True},
'scores': {
'type': 'dict',
'required': True,
'schema': {
'math': {'type': 'integer', 'min': 0, 'max': 100},
'english': {'type': 'integer', 'min': 0, 'max': 100},
'science': {'type': 'integer', 'min': 0, 'max': 100}
}
}
}
}
}
}
scores_document = {
'student_scores': [
{
'student_id': 'S123456',
'name': 'John Doe',
'scores': {'math': 85, 'english': 90, 'science': 78}
},
{
'student_id': 'S123457',
'name': 'Jane Smith',
'scores': {'math': 92, 'english': 88, 'science': 95}
}
]
}
scores_validator = Validator(scores_schema)
if scores_validator.validate(scores_document):
print("Student scores validation successful")
Custom Validators and Advanced Features
from cerberus import Validator
import re
from datetime import datetime, date
# Custom validator class
class CustomValidator(Validator):
def _validate_is_even(self, is_even, field, value):
"""Check if value is even
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if is_even and value % 2 != 0:
self._error(field, f"{value} must be an even number")
def _validate_future_date(self, future_date, field, value):
"""Check if date is in the future
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if future_date and isinstance(value, (date, datetime)):
if value <= date.today():
self._error(field, "Date must be in the future")
def _validate_strong_password(self, strong_password, field, value):
"""Check if password is strong
The rule's arguments are validated against this schema:
{'type': 'boolean'}
"""
if strong_password and isinstance(value, str):
if len(value) < 8:
self._error(field, "Password must be at least 8 characters long")
if not re.search(r'[A-Z]', value):
self._error(field, "Password must contain uppercase letters")
if not re.search(r'[a-z]', value):
self._error(field, "Password must contain lowercase letters")
if not re.search(r'\d', value):
self._error(field, "Password must contain digits")
if not re.search(r'[!@#$%^&*(),.?":{}|<>]', value):
self._error(field, "Password must contain special characters")
# Schema using custom rules
custom_schema = {
'user_id': {
'type': 'integer',
'required': True,
'is_even': True # Custom rule
},
'password': {
'type': 'string',
'required': True,
'strong_password': True # Custom rule
},
'appointment_date': {
'type': 'date',
'required': True,
'future_date': True # Custom rule
},
'username': {
'type': 'string',
'required': True,
'minlength': 3,
'maxlength': 20,
'regex': r'^[a-zA-Z0-9_]+$'
}
}
# Using custom validator
custom_validator = CustomValidator(custom_schema)
# Test data
test_data = {
'user_id': 12, # Even number
'password': 'SecurePass123!', # Strong password
'appointment_date': date(2025, 12, 31), # Future date
'username': 'user_123'
}
if custom_validator.validate(test_data):
print("Custom validation successful")
else:
print("Custom validation errors:")
for field, errors in custom_validator.errors.items():
print(f" {field}: {errors}")
# Function-based custom validator
def validate_phone_number(field, value, error):
"""Check US phone number format"""
if not re.match(r'^\d{3}-\d{3}-\d{4}$', value):
error(field, 'Please enter phone number in format: 123-456-7890')
# Register function-based validator
phone_schema = {
'phone': {
'type': 'string',
'required': True,
'check_with': validate_phone_number
}
}
phone_validator = Validator(phone_schema)
phone_data = {'phone': '123-456-7890'}
if phone_validator.validate(phone_data):
print("Phone number validation successful")
Conditional Validation and Dynamic Schema Generation
# Conditional validation (depend on other fields)
conditional_schema = {
'account_type': {
'type': 'string',
'required': True,
'allowed': ['personal', 'business', 'premium']
},
'company_name': {
'type': 'string',
'required': False,
'dependencies': 'account_type' # Required only if account_type exists
},
'tax_id': {
'type': 'string',
'required': False,
'dependencies': ['account_type', 'company_name']
},
'credit_limit': {
'type': 'integer',
'required': False,
'min': 1000,
'dependencies': 'account_type'
}
}
class ConditionalValidator(Validator):
def _validate_dependencies(self, dependencies, field, value):
"""Check conditional required fields"""
if isinstance(dependencies, str):
dependencies = [dependencies]
document = self.document
for dep in dependencies:
if dep in document:
if dep == 'account_type':
if document['account_type'] == 'business':
if field == 'company_name' and not value:
self._error(field, 'Company name is required for business accounts')
elif field == 'tax_id' and not value:
self._error(field, 'Tax ID is required for business accounts')
elif document['account_type'] == 'premium':
if field == 'credit_limit' and not value:
self._error(field, 'Credit limit is required for premium accounts')
# Dynamic schema generation
def generate_form_schema(form_type):
"""Dynamically generate schema based on form type"""
base_schema = {
'name': {'type': 'string', 'required': True, 'minlength': 1},
'email': {'type': 'string', 'required': True, 'regex': r'^[^@]+@[^@]+\.[^@]+$'}
}
if form_type == 'registration':
base_schema.update({
'password': {
'type': 'string',
'required': True,
'minlength': 8
},
'confirm_password': {
'type': 'string',
'required': True
},
'age': {
'type': 'integer',
'required': True,
'min': 13
}
})
elif form_type == 'profile':
base_schema.update({
'bio': {
'type': 'string',
'required': False,
'maxlength': 500
},
'website': {
'type': 'string',
'required': False,
'regex': r'^https?://.+'
}
})
elif form_type == 'contact':
base_schema.update({
'subject': {
'type': 'string',
'required': True,
'minlength': 5
},
'message': {
'type': 'string',
'required': True,
'minlength': 10
}
})
return base_schema
# Dynamic schema usage example
registration_schema = generate_form_schema('registration')
profile_schema = generate_form_schema('profile')
contact_schema = generate_form_schema('contact')
# Validation execution
registration_data = {
'name': 'John Doe',
'email': '[email protected]',
'password': 'SecurePass123',
'confirm_password': 'SecurePass123',
'age': 25
}
reg_validator = Validator(registration_schema)
if reg_validator.validate(registration_data):
print("Registration form validation successful")
Error Handling and YAML/JSON Integration
import yaml
import json
from cerberus import Validator
# Schema definition in YAML format
yaml_schema_str = """
user:
type: dict
required: true
schema:
name:
type: string
required: true
minlength: 2
maxlength: 50
age:
type: integer
required: true
min: 0
max: 150
email:
type: string
required: true
regex: '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'
preferences:
type: dict
required: false
schema:
newsletter:
type: boolean
default: false
language:
type: string
allowed: ['en', 'es', 'fr', 'de']
default: 'en'
"""
# Load schema from YAML
yaml_schema = yaml.safe_load(yaml_schema_str)
# Test data in JSON format
json_data_str = '''
{
"user": {
"name": "John Doe",
"age": 30,
"email": "[email protected]",
"preferences": {
"newsletter": true,
"language": "en"
}
}
}
'''
test_data = json.loads(json_data_str)
# Comprehensive error handling
class DetailedValidator(Validator):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.validation_summary = {
'total_fields': 0,
'valid_fields': 0,
'error_fields': 0,
'errors_by_type': {}
}
def validate(self, document, schema=None, update=False, normalize=True):
result = super().validate(document, schema, update, normalize)
self._generate_summary()
return result
def _generate_summary(self):
"""Generate validation result summary"""
if hasattr(self, 'document'):
self.validation_summary['total_fields'] = len(self._flatten_dict(self.document))
if self.errors:
error_dict = self._flatten_dict(self.errors)
self.validation_summary['error_fields'] = len(error_dict)
self.validation_summary['valid_fields'] = (
self.validation_summary['total_fields'] -
self.validation_summary['error_fields']
)
# Aggregate by error type
for field_path, error_list in error_dict.items():
for error in error_list:
if 'required' in error:
self.validation_summary['errors_by_type']['required'] = (
self.validation_summary['errors_by_type'].get('required', 0) + 1
)
elif 'type' in error:
self.validation_summary['errors_by_type']['type'] = (
self.validation_summary['errors_by_type'].get('type', 0) + 1
)
elif any(keyword in error for keyword in ['min', 'max', 'minlength', 'maxlength']):
self.validation_summary['errors_by_type']['range'] = (
self.validation_summary['errors_by_type'].get('range', 0) + 1
)
else:
self.validation_summary['errors_by_type']['other'] = (
self.validation_summary['errors_by_type'].get('other', 0) + 1
)
else:
self.validation_summary['valid_fields'] = self.validation_summary['total_fields']
def _flatten_dict(self, d, parent_key='', sep='.'):
"""Flatten nested dictionary"""
items = []
for k, v in d.items():
new_key = f"{parent_key}{sep}{k}" if parent_key else k
if isinstance(v, dict):
items.extend(self._flatten_dict(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
return dict(items)
def get_detailed_report(self):
"""Generate detailed validation report"""
report = {
'validation_result': len(self.errors) == 0,
'summary': self.validation_summary,
'errors': self.errors if self.errors else None,
'recommendations': []
}
# Generate recommendations
if self.validation_summary['errors_by_type'].get('required', 0) > 0:
report['recommendations'].append('Please check required field inputs')
if self.validation_summary['errors_by_type'].get('type', 0) > 0:
report['recommendations'].append('Please verify data types are correct')
if self.validation_summary['errors_by_type'].get('range', 0) > 0:
report['recommendations'].append('Please check value ranges and length limits')
return report
# Using detailed validator
detailed_validator = DetailedValidator(yaml_schema)
if detailed_validator.validate(test_data):
print("YAML/JSON validation successful")
print("Normalized data:", detailed_validator.normalized(test_data))
else:
report = detailed_validator.get_detailed_report()
print("Detailed validation report:")
print(json.dumps(report, indent=2, ensure_ascii=False))
# Practical example of config file validation
def validate_config_file(config_path, schema_path):
"""Config file validation"""
try:
with open(schema_path, 'r', encoding='utf-8') as f:
schema = yaml.safe_load(f)
with open(config_path, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
validator = DetailedValidator(schema)
if validator.validate(config):
return {
'valid': True,
'config': validator.normalized(config),
'message': 'Configuration file is valid'
}
else:
return {
'valid': False,
'errors': validator.errors,
'report': validator.get_detailed_report(),
'message': 'Configuration file has errors'
}
except Exception as e:
return {
'valid': False,
'error': str(e),
'message': 'Error occurred during file reading or parsing'
}