marshmallow
Library
marshmallow
Overview
marshmallow is a lightweight library for converting complex objects to and from simple Python datatypes. It provides three main capabilities: data validation, serialization, and deserialization, making it a standard tool widely adopted in Python web application development. As of 2025, version 4.0.0 is the latest release, occupying an important position in enterprise-level Python applications through integration with Flask, FastAPI, SQLAlchemy, and more. Commercial support is also available through Tidelift Subscription.
Details
marshmallow 4.0 series is the latest major version as of 2025, adopting a schema-based data processing approach. Through a three-stage process of input data validation, deserialization to application-level objects, and serialization to primitive Python types, it streamlines data rendering in standard formats such as JSON processing in HTTP APIs. It provides powerful abstraction solutions in Python object-oriented paradigms and is supported by many developers through schema definition methods similar to validation libraries in other languages.
Key Features
- Three-Stage Data Processing: Validation, deserialization, and serialization
- Schema-Based Design: Declarative and reusable schema definitions
- Framework Integration: Standard integration with Flask, FastAPI, SQLAlchemy, etc.
- Lightweight Architecture: High performance with minimal dependencies
- Rich Field Types: Built-in fields and extensible custom fields
- Commercial Support: Enterprise support through Tidelift
Pros and Cons
Pros
- Standard position and maturity in Python ecosystem
- Excellent integration with major frameworks like Flask and FastAPI
- Intuitive and maintainable API through schema definitions
- Rich feature set while remaining lightweight
- Natural handling of nested object structures
- Flexibility of partial serialization (only/exclude)
Cons
- Limited type hint support compared to Pydantic
- Verbosity due to schema class definition boilerplate
- Runtime performance overhead
- Configuration complexity for complex validation logic
- Challenges integrating with modern Python type system
- Somewhat steep learning curve for error handling
Reference Pages
Code Examples
Installation and Basic Setup
# Installing marshmallow
pip install marshmallow
poetry add marshmallow
pipenv install marshmallow
# Extra features (optional)
pip install marshmallow[dev] # Development dependencies
pip install marshmallow[reco] # Recommended dependencies
# Requires Python 3.8 or higher
# Lightweight with minimal dependencies
Basic Schema Definition and Serialization
from marshmallow import Schema, fields
from dataclasses import dataclass
from datetime import datetime
from pprint import pprint
# Data class definition
@dataclass
class User:
name: str
email: str
created_at: datetime
# marshmallow schema definition
class UserSchema(Schema):
name = fields.Str()
email = fields.Email()
created_at = fields.DateTime()
# Create user object
user = User(
name="John Doe",
email="[email protected]",
created_at=datetime.now()
)
# Serialization (object → dictionary)
schema = UserSchema()
result = schema.dump(user)
pprint(result)
# {'name': 'John Doe',
# 'email': '[email protected]',
# 'created_at': '2025-06-22T10:30:45.123456'}
# Multiple object serialization
users = [
User("Jane Smith", "[email protected]", datetime.now()),
User("Bob Johnson", "[email protected]", datetime.now())
]
schema_many = UserSchema(many=True)
results = schema_many.dump(users)
print(f"Number of users: {len(results)}")
# Partial serialization (field selection)
summary_schema = UserSchema(only=("name", "email"))
summary = summary_schema.dump(user)
pprint(summary)
# {'name': 'John Doe', 'email': '[email protected]'}
# Field exclusion
no_timestamp_schema = UserSchema(exclude=("created_at",))
no_timestamp = no_timestamp_schema.dump(user)
pprint(no_timestamp)
# {'name': 'John Doe', 'email': '[email protected]'}
Deserialization and Validation
from marshmallow import Schema, fields, ValidationError
from pprint import pprint
class UserSchema(Schema):
name = fields.Str(required=True)
email = fields.Email(required=True)
age = fields.Int(validate=lambda x: 0 <= x <= 150)
created_at = fields.DateTime()
# Valid data deserialization
user_data = {
"name": "John Doe",
"email": "[email protected]",
"age": 30,
"created_at": "2025-06-22T10:30:45.123456"
}
schema = UserSchema()
try:
result = schema.load(user_data)
pprint(result)
# {'name': 'John Doe',
# 'email': '[email protected]',
# 'age': 30,
# 'created_at': datetime.datetime(2025, 6, 22, 10, 30, 45, 123456)}
print("Deserialization successful")
except ValidationError as err:
print("Validation error:", err.messages)
# Validation error with invalid data
invalid_data = {
"name": "", # Empty string
"email": "invalid-email", # Invalid email format
"age": 200, # Out of range value
"created_at": "invalid-date" # Invalid date format
}
try:
result = schema.load(invalid_data)
except ValidationError as err:
print("Validation errors:")
for field, errors in err.messages.items():
print(f" {field}: {errors}")
# Valid data can also be retrieved
if err.valid_data:
print("Valid data:", err.valid_data)
# Validation only (error checking)
validation_result = schema.validate(invalid_data)
if validation_result:
print("Validation errors found:", validation_result)
else:
print("Validation successful")
# Partial deserialization
partial_data = {"name": "John Doe"}
try:
result = schema.load(partial_data, partial=True)
print("Partial deserialization successful:", result)
except ValidationError as err:
print("Error:", err.messages)
Nested Schemas and Complex Structures
from marshmallow import Schema, fields
from dataclasses import dataclass
from typing import List, Optional
from datetime import datetime
# Complex data structure definitions
@dataclass
class Address:
street: str
city: str
postal_code: str
country: str
@dataclass
class Company:
name: str
industry: str
@dataclass
class User:
name: str
email: str
address: Address
company: Optional[Company]
skills: List[str]
created_at: datetime
# Nested schema definitions
class AddressSchema(Schema):
street = fields.Str(required=True)
city = fields.Str(required=True)
postal_code = fields.Str(required=True)
country = fields.Str(required=True)
class CompanySchema(Schema):
name = fields.Str(required=True)
industry = fields.Str(required=True)
class UserSchema(Schema):
name = fields.Str(required=True)
email = fields.Email(required=True)
address = fields.Nested(AddressSchema, required=True)
company = fields.Nested(CompanySchema, allow_none=True)
skills = fields.List(fields.Str(), required=True)
created_at = fields.DateTime()
# Complex data creation
complex_data = {
"name": "John Doe",
"email": "[email protected]",
"address": {
"street": "123 Main St",
"city": "New York",
"postal_code": "10001",
"country": "USA"
},
"company": {
"name": "Example Corp",
"industry": "Technology"
},
"skills": ["Python", "JavaScript", "Go"],
"created_at": "2025-06-22T10:30:45.123456"
}
schema = UserSchema()
# Complex data deserialization
try:
result = schema.load(complex_data)
print("Complex data deserialization successful")
print(f"Address: {result['address']['city']}")
print(f"Number of skills: {len(result['skills'])}")
except ValidationError as err:
print("Error:", err.messages)
# Partial serialization of nested fields
address_only_schema = UserSchema(only=("name", "address.city", "address.country"))
user_obj = User(
name="John Doe",
email="[email protected]",
address=Address("123 Main St", "New York", "10001", "USA"),
company=Company("Example Corp", "Technology"),
skills=["Python", "JavaScript"],
created_at=datetime.now()
)
# Get only specific nested fields
address_result = UserSchema(only=("name", "address")).dump(user_obj)
print("Address only:", address_result)
# Nested objects in lists
teams_data = {
"name": "Development Team",
"members": [
{"name": "John Doe", "email": "[email protected]"},
{"name": "Jane Smith", "email": "[email protected]"}
]
}
class TeamSchema(Schema):
name = fields.Str(required=True)
members = fields.List(fields.Nested(UserSchema(only=("name", "email"))))
team_schema = TeamSchema()
team_result = team_schema.load(teams_data)
print(f"Team members count: {len(team_result['members'])}")
Custom Validation Functions and Advanced Features
from marshmallow import Schema, fields, validates, validates_schema, ValidationError, post_load
import re
from datetime import datetime, date
class AdvancedUserSchema(Schema):
username = fields.Str(required=True)
password = fields.Str(required=True, load_only=True) # Excluded from serialization
confirm_password = fields.Str(required=True, load_only=True)
email = fields.Email(required=True)
phone = fields.Str(required=True)
birth_date = fields.Date(required=True)
created_at = fields.DateTime(dump_only=True, default=datetime.now) # Ignored during deserialization
@validates('username')
def validate_username(self, value):
"""Custom username validation"""
if len(value) < 3:
raise ValidationError('Username must be at least 3 characters')
if not re.match(r'^[a-zA-Z0-9_]+$', value):
raise ValidationError('Username can only contain letters, numbers, and underscores')
# Reserved words check
reserved_words = ['admin', 'root', 'system', 'test']
if value.lower() in reserved_words:
raise ValidationError('This username is reserved')
@validates('password')
def validate_password(self, value):
"""Password strength check"""
if len(value) < 8:
raise ValidationError('Password must be at least 8 characters')
# Password strength requirements
if not re.search(r'[A-Z]', value):
raise ValidationError('Password must contain uppercase letters')
if not re.search(r'[a-z]', value):
raise ValidationError('Password must contain lowercase letters')
if not re.search(r'\d', value):
raise ValidationError('Password must contain digits')
if not re.search(r'[!@#$%^&*(),.?":{}|<>]', value):
raise ValidationError('Password must contain special characters')
@validates('phone')
def validate_phone(self, value):
"""US phone number format check"""
# US phone number patterns
patterns = [
r'^(\+1-?)?(\([0-9]{3}\)|[0-9]{3})-?[0-9]{3}-?[0-9]{4}$', # Various US formats
r'^(\+1\s?)?(\([0-9]{3}\)|[0-9]{3})[\s.-]?[0-9]{3}[\s.-]?[0-9]{4}$',
]
if not any(re.match(pattern, value) for pattern in patterns):
raise ValidationError('Please enter a valid US phone number format')
@validates('birth_date')
def validate_birth_date(self, value):
"""Birth date validity check"""
today = date.today()
age = today.year - value.year - ((today.month, today.day) < (value.month, value.day))
if age < 13:
raise ValidationError('Must be at least 13 years old')
if age > 120:
raise ValidationError('Please enter a valid birth date')
@validates_schema
def validate_passwords_match(self, data, **kwargs):
"""Schema-level validation: password confirmation"""
if 'password' in data and 'confirm_password' in data:
if data['password'] != data['confirm_password']:
raise ValidationError({'confirm_password': ['Passwords do not match']})
@post_load
def make_user(self, data, **kwargs):
"""Post-deserialization processing"""
# Remove confirm_password as it's for internal processing only
data.pop('confirm_password', None)
# Add creation timestamp
data['created_at'] = datetime.now()
return data
# Custom validation function
def validate_email_domain(email):
"""Allow only specific domain email addresses"""
allowed_domains = ['example.com', 'company.com', 'gmail.com']
domain = email.split('@')[1]
if domain not in allowed_domains:
raise ValidationError(f'Domain not allowed. Allowed domains: {", ".join(allowed_domains)}')
class RestrictedUserSchema(AdvancedUserSchema):
email = fields.Email(required=True, validate=validate_email_domain)
# Advanced validation testing
advanced_data = {
"username": "john_user",
"password": "SecurePass123!",
"confirm_password": "SecurePass123!",
"email": "[email protected]",
"phone": "(555) 123-4567",
"birth_date": "1990-05-15"
}
schema = AdvancedUserSchema()
try:
result = schema.load(advanced_data)
print("Advanced validation successful:")
print(f"Username: {result['username']}")
print(f"Created at: {result['created_at']}")
# Password not included (load_only)
print("Password not included:", 'password' not in result)
except ValidationError as err:
print("Validation error:", err.messages)
# Testing with invalid data
invalid_advanced_data = {
"username": "ad", # Too short
"password": "weak", # Weak password
"confirm_password": "different", # Different password
"email": "[email protected]", # Disallowed domain
"phone": "123-456-789", # Invalid phone format
"birth_date": "2020-01-01" # Age restriction violation
}
try:
result = RestrictedUserSchema().load(invalid_advanced_data)
except ValidationError as err:
print("\nMultiple errors caught:")
for field, errors in err.messages.items():
print(f" {field}: {errors}")
Web Framework and API Integration
from marshmallow import Schema, fields, ValidationError
from flask import Flask, request, jsonify
import json
# Flask integration example
app = Flask(__name__)
class CreateUserSchema(Schema):
name = fields.Str(required=True)
email = fields.Email(required=True)
age = fields.Int(required=True, validate=lambda x: 18 <= x <= 100)
class UpdateUserSchema(Schema):
name = fields.Str()
email = fields.Email()
age = fields.Int(validate=lambda x: 18 <= x <= 100)
class UserResponseSchema(Schema):
id = fields.Int()
name = fields.Str()
email = fields.Email()
age = fields.Int()
created_at = fields.DateTime()
# Validation decorator
def validate_json(schema_class):
def decorator(f):
def wrapper(*args, **kwargs):
try:
schema = schema_class()
validated_data = schema.load(request.json)
return f(validated_data, *args, **kwargs)
except ValidationError as err:
return jsonify({
'error': 'Validation error',
'messages': err.messages
}), 400
wrapper.__name__ = f.__name__
return wrapper
return decorator
@app.route('/users', methods=['POST'])
@validate_json(CreateUserSchema)
def create_user(validated_data):
"""User creation API"""
# Database save processing (omitted)
user_data = {
'id': 1,
'created_at': '2025-06-22T10:30:45.123456',
**validated_data
}
# Response serialization
response_schema = UserResponseSchema()
return jsonify(response_schema.dump(user_data)), 201
@app.route('/users/<int:user_id>', methods=['PUT'])
@validate_json(UpdateUserSchema)
def update_user(validated_data, user_id):
"""User update API"""
# Database update processing (omitted)
user_data = {
'id': user_id,
'name': validated_data.get('name', 'Existing Name'),
'email': validated_data.get('email', '[email protected]'),
'age': validated_data.get('age', 25),
'created_at': '2025-06-22T10:30:45.123456'
}
response_schema = UserResponseSchema()
return jsonify(response_schema.dump(user_data))
# FastAPI integration example (conceptual implementation)
from typing import Optional
class FastAPIIntegration:
"""FastAPI marshmallow integration patterns"""
@staticmethod
def validate_request_body(schema_class, data: dict):
"""Request body validation"""
try:
schema = schema_class()
return schema.load(data), None
except ValidationError as err:
return None, err.messages
@staticmethod
def serialize_response(schema_class, data):
"""Response serialization"""
schema = schema_class()
return schema.dump(data)
# Usage example
def fastapi_create_user_handler(request_data: dict):
"""FastAPI-style handler example"""
validated_data, errors = FastAPIIntegration.validate_request_body(
CreateUserSchema, request_data
)
if errors:
return {"error": "Validation failed", "details": errors}, 400
# Business logic processing
user_data = {
'id': 1,
'created_at': '2025-06-22T10:30:45.123456',
**validated_data
}
response_data = FastAPIIntegration.serialize_response(
UserResponseSchema, user_data
)
return response_data, 200
# SQLAlchemy integration example
class SQLAlchemyIntegration:
"""SQLAlchemy model integration patterns"""
def __init__(self, model_class, schema_class):
self.model_class = model_class
self.schema_class = schema_class
def create_from_dict(self, data: dict):
"""Create model instance from dictionary"""
schema = self.schema_class()
validated_data = schema.load(data)
return self.model_class(**validated_data)
def serialize_model(self, model_instance):
"""Serialize model instance"""
schema = self.schema_class()
return schema.dump(model_instance)
def bulk_serialize(self, model_instances):
"""Bulk serialization of multiple instances"""
schema = self.schema_class(many=True)
return schema.dump(model_instances)
# Use case example
def api_workflow_example():
"""API processing workflow example"""
# 1. Request data
request_data = {
"name": "John Doe",
"email": "[email protected]",
"age": 30
}
# 2. Validation and deserialization
create_schema = CreateUserSchema()
try:
validated_data = create_schema.load(request_data)
print("Validation successful:", validated_data)
except ValidationError as err:
print("Validation error:", err.messages)
return
# 3. Business logic processing (user creation)
user_data = {
'id': 123,
'created_at': '2025-06-22T10:30:45.123456',
**validated_data
}
# 4. Response serialization
response_schema = UserResponseSchema()
response_data = response_schema.dump(user_data)
print("Response data:", response_data)
if __name__ == "__main__":
api_workflow_example()
Advanced Schema Design and Best Practices
from marshmallow import Schema, fields, validates_schema, ValidationError, pre_load, post_load, pre_dump, post_dump
from marshmallow.validate import Length, Range, OneOf
from typing import Dict, Any
import json
class BaseTimestampSchema(Schema):
"""Base schema with timestamps"""
created_at = fields.DateTime(dump_only=True)
updated_at = fields.DateTime(dump_only=True)
class PaginationSchema(Schema):
"""Pagination schema"""
page = fields.Int(required=True, validate=Range(min=1))
per_page = fields.Int(required=True, validate=Range(min=1, max=100))
total = fields.Int(dump_only=True)
pages = fields.Int(dump_only=True)
class SearchQuerySchema(Schema):
"""Search query schema"""
q = fields.Str(required=True, validate=Length(min=1, max=100))
category = fields.Str(validate=OneOf(['tech', 'business', 'hobby']))
sort_by = fields.Str(validate=OneOf(['created_at', 'updated_at', 'name']))
sort_order = fields.Str(validate=OneOf(['asc', 'desc']), default='desc')
class DynamicFieldsSchema(Schema):
"""Schema supporting dynamic field selection"""
def __init__(self, only_fields=None, exclude_fields=None, *args, **kwargs):
super().__init__(*args, **kwargs)
if only_fields:
self.only = only_fields
if exclude_fields:
self.exclude = exclude_fields
class UserProfileSchema(BaseTimestampSchema, DynamicFieldsSchema):
"""User profile schema"""
id = fields.Int(dump_only=True)
username = fields.Str(required=True)
email = fields.Email(required=True)
full_name = fields.Str(required=True)
bio = fields.Str(allow_none=True)
avatar_url = fields.Url(allow_none=True)
is_active = fields.Bool(default=True)
# Private information (conditionally excluded)
phone = fields.Str(allow_none=True)
birth_date = fields.Date(allow_none=True)
@pre_load
def preprocess_input(self, data, **kwargs):
"""Input data preprocessing"""
# String normalization
if 'full_name' in data:
data['full_name'] = data['full_name'].strip()
if 'username' in data:
data['username'] = data['username'].lower().strip()
return data
@post_load
def validate_and_transform(self, data, **kwargs):
"""Post-load validation and transformation"""
# Username duplication check (in real apps, check DB)
existing_usernames = ['admin', 'root', 'test']
if data.get('username') in existing_usernames:
raise ValidationError({'username': ['This username is not available']})
return data
@pre_dump
def prepare_for_serialization(self, data, **kwargs):
"""Preparation before serialization"""
# Exclude private information based on context
context = self.context or {}
is_owner = context.get('is_owner', False)
is_admin = context.get('is_admin', False)
if not (is_owner or is_admin):
# For other people's profiles, exclude private information
sensitive_fields = ['phone', 'birth_date']
for field in sensitive_fields:
if hasattr(data, field):
setattr(data, field, None)
elif isinstance(data, dict) and field in data:
data.pop(field, None)
return data
@post_dump
def add_computed_fields(self, data, **kwargs):
"""Add computed fields"""
# Age calculation (from birth date)
if data.get('birth_date'):
from datetime import date
birth_date = data['birth_date']
if isinstance(birth_date, str):
birth_date = date.fromisoformat(birth_date)
today = date.today()
age = today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))
data['age'] = age
return data
class BlogPostSchema(BaseTimestampSchema):
"""Blog post schema"""
id = fields.Int(dump_only=True)
title = fields.Str(required=True, validate=Length(min=1, max=200))
content = fields.Str(required=True, validate=Length(min=10))
slug = fields.Str(dump_only=True)
status = fields.Str(validate=OneOf(['draft', 'published', 'archived']), default='draft')
author = fields.Nested(UserProfileSchema(only_fields=['id', 'username', 'full_name']))
tags = fields.List(fields.Str(), default=list)
view_count = fields.Int(dump_only=True, default=0)
@validates_schema
def validate_content_and_title(self, data, **kwargs):
"""Related validation of title and content"""
title = data.get('title', '')
content = data.get('content', '')
# Content length restriction for short titles
if len(title) < 10 and len(content) < 100:
raise ValidationError('For short titles, content must be at least 100 characters')
class ApiResponseSchema(Schema):
"""Unified schema for API responses"""
success = fields.Bool(default=True)
message = fields.Str()
data = fields.Raw()
pagination = fields.Nested(PaginationSchema, allow_none=True)
@post_dump
def remove_none_values(self, data, **kwargs):
"""Remove None values"""
return {k: v for k, v in data.items() if v is not None}
class SchemaManager:
"""Schema management and utility class"""
@staticmethod
def create_api_response(data=None, message="Success", success=True, pagination=None):
"""Create API response"""
response_data = {
'success': success,
'message': message,
'data': data,
'pagination': pagination
}
return ApiResponseSchema().dump(response_data)
@staticmethod
def validate_and_serialize_list(schema_class, data_list, context=None):
"""Validation and serialization of list data"""
try:
schema = schema_class(many=True, context=context)
validated_data = schema.load(data_list)
serialized_data = schema.dump(validated_data)
return serialized_data, None
except ValidationError as err:
return None, err.messages
@staticmethod
def dynamic_schema_serialization(schema_class, data, only_fields=None, exclude_fields=None, context=None):
"""Serialization with dynamic field selection"""
schema = schema_class(
only_fields=only_fields,
exclude_fields=exclude_fields,
context=context
)
return schema.dump(data)
# Practical usage example
def advanced_schema_example():
"""Advanced schema usage example"""
# User data
user_data = {
'username': ' JohnUser ', # Will be trimmed in preprocessing
'email': '[email protected]',
'full_name': 'John Doe',
'bio': 'Python developer',
'phone': '(555) 123-4567',
'birth_date': '1990-05-15'
}
# 1. Serialization as owner (including private information)
user_schema_owner = UserProfileSchema(context={'is_owner': True})
try:
validated_user = user_schema_owner.load(user_data)
serialized_owner = user_schema_owner.dump(validated_user)
print("Owner view:", json.dumps(serialized_owner, indent=2))
except ValidationError as err:
print("Validation error:", err.messages)
# 2. Serialization as other person (excluding private information)
user_schema_public = UserProfileSchema(context={'is_owner': False})
serialized_public = user_schema_public.dump(validated_user)
print("\nPublic view:", json.dumps(serialized_public, indent=2))
# 3. API response creation
response = SchemaManager.create_api_response(
data=serialized_public,
message="User information retrieved successfully"
)
print("\nAPI response:", json.dumps(response, indent=2))
# 4. Dynamic field selection
summary_data = SchemaManager.dynamic_schema_serialization(
UserProfileSchema,
validated_user,
only_fields=['username', 'full_name', 'bio'],
context={'is_owner': False}
)
print("\nSummary view:", json.dumps(summary_data, indent=2))
if __name__ == "__main__":
advanced_schema_example()