Pydantic
Library
Pydantic
Overview
Pydantic is developed as the fastest Python data validation library using "Data validation using Python type hints". With an intuitive API design leveraging Python type hints, it safely converts untrusted data into secure Python objects. Its core logic written in Rust delivers up to 10x faster processing than other libraries. Adopted by over 8,000 PyPI packages including FastAPI, LangChain, and HuggingFace, it is the standard validation library in the Python ecosystem.
Details
Pydantic v2 is the latest version as of 2025, achieving significant performance improvements through a complete rewrite with Rust-based pydantic-core. It defines data structures using pure, standard type hint notation in Python 3.9+, with Pydantic automatically handling validation, serialization, and deserialization. It provides rich functionality for data science, machine learning, and Web API development, including Strict and Lax mode selection, JSON Schema generation, and SQLAlchemy integration.
Key Features
- Highest Performance: 10x processing speed compared to other libraries through Rust core
- Python Type Hints Integration: Intuitive API using standard type hint notation
- Robust Type Conversion: Flexible data type coercion and strict type checking
- JSON Schema Generation: Automatic OpenAPI/JSON Schema generation functionality
- Rich Ecosystem: Deep integration with FastAPI, SQLAlchemy, Django, etc.
- Detailed Errors: Comprehensive and understandable validation error messages
Pros and Cons
Pros
- Most widely used track record and reliability in Python ecosystem
- Outstanding performance through Rust core (10x faster)
- Strongest Web API development environment through FastAPI integration
- Rich use cases in data science and machine learning fields
- Low learning curve through type hint notation
- Comprehensive official documentation and community support
Cons
- Increased memory usage in large-scale projects
- Type inference performance with complex nested models
- Python-only (no cross-language interoperability)
- Relatively high learning curve for custom validators
- End of support for legacy Python versions (3.8 and below)
- Runtime overhead with some advanced features
Reference Pages
Code Examples
Installation and Basic Setup
# Install Pydantic
pip install pydantic
# Optional: Install with additional features
pip install "pydantic[email]" # Email validation
pip install "pydantic[dotenv]" # .env settings loading
pip install "pydantic[all]" # All additional features
# For development environment
pip install "pydantic[dev]"
Basic Schema Definition and Validation
from datetime import datetime
from typing import Optional, List
from pydantic import BaseModel, ValidationError
# Basic model definition
class User(BaseModel):
id: int
name: str
email: str
age: Optional[int] = None
is_active: bool = True
created_at: datetime = datetime.now()
tags: List[str] = []
# Data validation
user_data = {
'id': '123', # Automatically converted from string to int
'name': 'John Doe',
'email': '[email protected]',
'age': '30', # Automatically converted from string to int
'tags': ['developer', 'python']
}
try:
user = User(**user_data)
print(user)
print(f"User ID: {user.id}, type: {type(user.id)}") # int type
print(f"Age: {user.age}, type: {type(user.age)}") # int type
except ValidationError as e:
print(f"Validation error: {e}")
# Model dictionary output
print(user.model_dump())
# {'id': 123, 'name': 'John Doe', 'email': '[email protected]', ...}
# JSON format output
print(user.model_dump_json())
# Partial data update
updated_user = user.model_copy(update={'age': 31, 'is_active': False})
print(updated_user)
# Dataclass-style usage
class Product(BaseModel):
name: str
price: float
in_stock: bool
# Validation test
valid_product = Product(name="Laptop", price=999.99, in_stock=True)
print(valid_product.price) # 999.99
Advanced Validation Rules and Custom Validators
from pydantic import BaseModel, Field, validator, EmailStr, ValidationError
from typing import Annotated
from annotated_types import Gt, Lt, Len
# Detailed field validation
class UserProfile(BaseModel):
username: Annotated[str, Len(min_length=3, max_length=20)] = Field(
...,
description="Username (3-20 characters)"
)
email: EmailStr = Field(..., description="Valid email address")
age: Annotated[int, Gt(0), Lt(150)] = Field(
...,
description="Age (1-149 years)"
)
password: str = Field(..., min_length=8, description="Password (8+ characters)")
bio: Optional[str] = Field(None, max_length=500, description="Bio (up to 500 characters)")
@validator('username')
def validate_username(cls, v):
if not v.isalnum():
raise ValueError('Username can only contain alphanumeric characters')
return v.lower()
@validator('password')
def validate_password(cls, v):
if not any(c.isupper() for c in v):
raise ValueError('Password must contain uppercase letters')
if not any(c.islower() for c in v):
raise ValueError('Password must contain lowercase letters')
if not any(c.isdigit() for c in v):
raise ValueError('Password must contain digits')
return v
class Config:
# Example for JSON Schema generation
schema_extra = {
"example": {
"username": "johndoe",
"email": "[email protected]",
"age": 30,
"password": "SecurePass123",
"bio": "Python developer"
}
}
# Complex nested models
class Address(BaseModel):
street: str
city: str
postal_code: str = Field(..., regex=r'^\d{5}-\d{4}$')
country: str = "USA"
class Company(BaseModel):
name: str
address: Address
employees: List[UserProfile]
founded_year: int = Field(..., ge=1800, le=2025)
# Custom validation example
company_data = {
"name": "Tech Corp",
"address": {
"street": "123 Main St",
"city": "New York",
"postal_code": "10001-1234"
},
"employees": [
{
"username": "Alice123",
"email": "[email protected]",
"age": 28,
"password": "SecurePass123"
}
],
"founded_year": 2020
}
company = Company(**company_data)
print(f"Company name: {company.name}")
print(f"Employee count: {len(company.employees)}")
Framework Integration (FastAPI, Django, Flask, etc.)
# FastAPI integration example
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, ValidationError
app = FastAPI()
class UserCreate(BaseModel):
name: str
email: EmailStr
age: int = Field(..., ge=18, le=120)
class UserResponse(BaseModel):
id: int
name: str
email: str
created_at: datetime
@app.post("/users/", response_model=UserResponse)
async def create_user(user: UserCreate):
# Automatic validation using Pydantic models
# user.name and user.email can be accessed type-safely
new_user = UserResponse(
id=123,
name=user.name,
email=user.email,
created_at=datetime.now()
)
return new_user
# SQLAlchemy integration example
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
Base = declarative_base()
class UserDB(Base):
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String)
email = Column(String)
# Conversion between Pydantic and SQLAlchemy models
class UserPydantic(BaseModel):
id: int
name: str
email: str
class Config:
from_attributes = True # Can be created from SQLAlchemy objects
# Django Rest Framework integration example
class ArticleSerializer(BaseModel):
title: str = Field(..., max_length=200)
content: str
author_id: int
published_at: Optional[datetime] = None
class Config:
# Compatibility with Django models
orm_mode = True
# Flask integration example
from flask import Flask, request, jsonify
flask_app = Flask(__name__)
@flask_app.route('/api/users', methods=['POST'])
def create_user_flask():
try:
user_data = UserCreate(**request.json)
# Use validated data
return jsonify({"status": "success", "user": user_data.dict()})
except ValidationError as e:
return jsonify({"status": "error", "errors": e.errors()}), 400
Error Handling and Custom Error Messages
from pydantic import BaseModel, ValidationError, validator
from typing import List
class ProductOrder(BaseModel):
product_name: str = Field(..., min_length=1, max_length=100)
quantity: int = Field(..., gt=0, description="Quantity must be greater than 0")
unit_price: float = Field(..., gt=0, description="Unit price must be greater than 0")
discount_rate: float = Field(0.0, ge=0.0, le=1.0, description="Discount rate must be 0-1")
@validator('product_name')
def validate_product_name(cls, v):
if v.strip() != v:
raise ValueError('Product name cannot have leading or trailing whitespace')
if any(char in v for char in ['<', '>', '&']):
raise ValueError('Product name cannot contain special characters')
return v
@validator('quantity')
def validate_quantity(cls, v):
if v > 1000:
raise ValueError('Maximum order quantity is 1000 items')
return v
def total_price(self) -> float:
"""Calculate total price"""
return self.quantity * self.unit_price * (1 - self.discount_rate)
# Detailed error handling example
def process_order(order_data: dict) -> ProductOrder:
try:
order = ProductOrder(**order_data)
print(f"Order processed: {order.product_name}, Total: ${order.total_price():.2f}")
return order
except ValidationError as e:
print("Validation errors occurred:")
for error in e.errors():
field = error['loc'][0] if error['loc'] else 'unknown'
message = error['msg']
value = error.get('input', 'N/A')
print(f" Field: {field}")
print(f" Error: {message}")
print(f" Input: {value}")
print("---")
raise
# Error case testing
invalid_orders = [
{"product_name": "", "quantity": 1, "unit_price": 100}, # Empty product name
{"product_name": "Product A", "quantity": 0, "unit_price": 100}, # Zero quantity
{"product_name": "Product B", "quantity": 5, "unit_price": -100}, # Negative price
{"product_name": "Product C", "quantity": 5, "unit_price": 100, "discount_rate": 1.5}, # Invalid discount
]
for i, order_data in enumerate(invalid_orders):
print(f"\n=== Error Case {i+1} ===")
try:
process_order(order_data)
except ValidationError:
pass # Error already printed
# Custom error message configuration
class CustomValidatedModel(BaseModel):
name: str = Field(..., min_length=2, max_length=50)
age: int = Field(..., ge=0, le=150)
class Config:
error_msg_templates = {
'value_error.missing': 'This field is required',
'value_error.str.max_length': 'Too many characters (max {limit_value})',
'value_error.number.not_ge': 'Value too small (must be >= {limit_value})',
}
Type Safety and TypeScript Integration
from pydantic import BaseModel, Field
from typing import Dict, List, Union, Optional, Generic, TypeVar
from enum import Enum
import json
# Enum type usage
class StatusEnum(str, Enum):
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
class Priority(int, Enum):
LOW = 1
MEDIUM = 2
HIGH = 3
URGENT = 4
# Generic type usage
T = TypeVar('T')
class APIResponse(BaseModel, Generic[T]):
success: bool
data: T
message: Optional[str] = None
errors: Optional[List[str]] = None
class Task(BaseModel):
id: int
title: str
description: Optional[str] = None
status: StatusEnum = StatusEnum.PENDING
priority: Priority = Priority.MEDIUM
assignee_id: Optional[int] = None
metadata: Dict[str, Union[str, int, float]] = Field(default_factory=dict)
# Generic type usage example
task_data = {
"id": 1,
"title": "Sample Task",
"status": "processing",
"priority": 3,
"metadata": {"estimated_hours": 5, "complexity": "medium"}
}
task = Task(**task_data)
response = APIResponse[Task](
success=True,
data=task,
message="Task created successfully"
)
print(f"Task: {response.data.title}")
print(f"Status: {response.data.status}")
print(f"Priority: {response.data.priority}")
# JSON Schema generation (for TypeScript type definition generation)
schema = Task.schema()
print(json.dumps(schema, indent=2, ensure_ascii=False))
# TypeScript type definition generation example (using third-party tools)
# pip install pydantic-to-typescript required
"""
from pydantic2ts import generate_typescript_defs
generate_typescript_defs("path/to/models.py", "path/to/output.ts")
"""
# Compatibility with dataclasses
from dataclasses import dataclass
from pydantic.dataclasses import dataclass as pydantic_dataclass
@pydantic_dataclass
class DataClassUser:
name: str
age: int = Field(..., gt=0)
email: Optional[str] = None
# Can be used like regular dataclass, but with Pydantic validation
user = DataClassUser(name="John", age=30, email="[email protected]")
print(user.name) # John
# Configuration and validation
class DatabaseConfig(BaseModel):
host: str = "localhost"
port: int = Field(5432, ge=1, le=65535)
database: str
username: str
password: str = Field(..., min_length=8)
ssl_mode: bool = True
pool_size: int = Field(10, ge=1, le=100)
class Config:
# Loading from environment variables
env_prefix = 'DB_'
case_sensitive = False
# Can automatically load from environment variables DB_HOST, DB_PORT, etc.
config = DatabaseConfig(
database="myapp",
username="admin",
password="securepass123"
)
Advanced Features and Performance Optimization
from pydantic import BaseModel, Field, root_validator, validator
from typing import Any, Dict, List
import time
from decimal import Decimal
# Custom data types and validation
class Money(BaseModel):
amount: Decimal = Field(..., decimal_places=2)
currency: str = Field(..., regex=r'^[A-Z]{3}$')
def __str__(self):
return f"{self.amount} {self.currency}"
# Root-level validation
class UserRegistration(BaseModel):
username: str
email: str
password: str
confirm_password: str
terms_accepted: bool
@root_validator
def validate_passwords_match(cls, values):
password = values.get('password')
confirm_password = values.get('confirm_password')
if password != confirm_password:
raise ValueError('Passwords do not match')
return values
@root_validator
def validate_terms_accepted(cls, values):
if not values.get('terms_accepted'):
raise ValueError('Terms and conditions must be accepted')
return values
# Performance measurement
class PerformanceTest(BaseModel):
name: str
value: int
metadata: Dict[str, Any] = Field(default_factory=dict)
def benchmark_validation():
# Large data validation performance test
data_list = [
{"name": f"item_{i}", "value": i, "metadata": {"index": i}}
for i in range(10000)
]
start_time = time.time()
validated_objects = [PerformanceTest(**data) for data in data_list]
end_time = time.time()
print(f"10,000 validations time: {end_time - start_time:.4f}s")
print(f"Average time per item: {(end_time - start_time) / 10000 * 1000:.2f}ms")
# Execute
benchmark_validation()
# Custom JSON encoder
class CustomModel(BaseModel):
timestamp: datetime
price: Money
class Config:
json_encoders = {
datetime: lambda v: v.isoformat(),
Decimal: lambda v: float(v),
}
# Partial validation (experimental)
class PartialUpdateModel(BaseModel):
name: Optional[str] = None
age: Optional[int] = None
email: Optional[str] = None
class Config:
# Validation settings for partial updates
validate_assignment = True
allow_population_by_field_name = True
# Strict mode and Lax mode switching
class StrictModel(BaseModel):
number: int
text: str
class Config:
# Strict mode: No type coercion
# Lax mode (default): Attempts type conversion when possible
anystr_strip_whitespace = True
validate_assignment = True
# Lax mode (default)
lax_model = StrictModel(number="123", text=" hello ") # Success: "123" -> 123
print(lax_model.number, type(lax_model.number)) # 123 <class 'int'>
print(f"'{lax_model.text}'") # 'hello' (whitespace stripped)
# Detailed ValidationError analysis
def analyze_validation_error():
try:
StrictModel(number="abc", text=123)
except ValidationError as e:
for error in e.errors():
print(f"Field: {'.'.join(str(x) for x in error['loc'])}")
print(f"Error type: {error['type']}")
print(f"Message: {error['msg']}")
print(f"Input: {error['input']}")
if 'ctx' in error:
print(f"Context: {error['ctx']}")
print("---")
analyze_validation_error()