SQLAlchemy
SQLAlchemy is known as "The Database Toolkit for Python" and is the oldest and most mature ORM library in Python. True to its name "SQL + Alchemy," it enables diverse database programming and automates SQL statement construction. Supporting major database engines like MySQL, PostgreSQL, and SQLite, it provides flexible development support through three levels of abstraction.
GitHub Overview
sqlalchemy/sqlalchemy
The Database Toolkit for Python
Topics
Star History
GitHub Star History
Library
SQLAlchemy
Overview
SQLAlchemy is known as "The Database Toolkit for Python" and is the oldest and most mature ORM library in Python. True to its name "SQL + Alchemy," it enables diverse database programming and automates SQL statement construction. Supporting major database engines like MySQL, PostgreSQL, and SQLite, it provides flexible development support through three levels of abstraction.
Details
SQLAlchemy 2.0 continues active development as of 2025 and is the de facto standard ORM in the Python ecosystem. As an "Object-Relational Mapping" tool, it provides the mechanism to "connect" Python classes/objects with database tables/rows, allowing developers to complete database operations using only Python code.
Key Features
- 3-Level Abstraction: Combinable ORM, SQL Expression Language, and low-level execute methods
- Extensive Database Support: MySQL, PostgreSQL, SQLite, Oracle, MS-SQL, and more
- Session Management: Advanced transaction management and object lifecycle
- Relationship Features: Complete support for One-to-One, One-to-Many, Many-to-Many
- Migration Support: Powerful schema management via Alembic integration
- Type Hints Support: Full integration with Python 3.6+ type hints
Pros and Cons
Pros
- Most established ORM in Python ecosystem (20+ years of development)
- High portability through extensive database engine support
- Flexible abstraction levels (raw SQL to full ORM) enable gradual adoption
- Powerful query building capabilities and performance optimization options
- Excellent compatibility with FastAPI for modern API development
- Comprehensive documentation and rich community resources
Cons
- Steep learning curve requiring significant time to master
- Complex configuration with high barrier for beginners
- Performance tuning requires deep understanding
- Tends to generate boilerplate code
- Developer experience may be inferior compared to newer ORMs
- Error messages can be unclear at times
References
Code Examples
Basic Setup
pip install sqlalchemy
pip install sqlalchemy[postgresql] # For PostgreSQL
pip install alembic # For migrations
Model Definition (2.0 Style)
from typing import List, Optional
from sqlalchemy import String, Integer, ForeignKey, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship, Session
class Base(DeclarativeBase):
pass
class User(Base):
__tablename__ = "user_account"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String(30))
fullname: Mapped[Optional[str]]
email: Mapped[str] = mapped_column(String(100), unique=True)
# One-to-Many relationship
addresses: Mapped[List["Address"]] = relationship(
back_populates="user",
cascade="all, delete-orphan"
)
def __repr__(self) -> str:
return f"User(id={self.id!r}, name={self.name!r}, fullname={self.fullname!r})"
class Address(Base):
__tablename__ = "address"
id: Mapped[int] = mapped_column(primary_key=True)
email_address: Mapped[str]
user_id: Mapped[int] = mapped_column(ForeignKey("user_account.id"))
# Many-to-One relationship
user: Mapped["User"] = relationship(back_populates="addresses")
def __repr__(self) -> str:
return f"Address(id={self.id!r}, email_address={self.email_address!r})"
Database Connection and Session Setup
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
# Database connection
engine = create_engine(
"postgresql://username:password@localhost/dbname",
echo=True, # Log SQL output
pool_size=10,
max_overflow=20
)
# Create tables
Base.metadata.create_all(engine)
# Create session factory
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# Session usage pattern
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
Basic CRUD Operations
from sqlalchemy import select
# Create session
with Session(engine) as session:
# Create - User creation
new_user = User(
name="john_doe",
fullname="John Doe",
email="[email protected]",
addresses=[
Address(email_address="[email protected]"),
Address(email_address="[email protected]")
]
)
session.add(new_user)
session.commit()
# Read - Data retrieval
# Get all users
stmt = select(User)
users = session.execute(stmt).scalars().all()
# Conditional search
stmt = select(User).where(User.name == "john_doe")
user = session.execute(stmt).scalar_one_or_none()
# Search by primary key
user = session.get(User, 1)
# Update - Data modification
stmt = select(User).where(User.name == "john_doe")
user = session.execute(stmt).scalar_one()
user.fullname = "John F. Doe"
session.commit()
# Delete - Data removal
session.delete(user)
session.commit()
Advanced Query Operations
from sqlalchemy import and_, or_, func, desc
with Session(engine) as session:
# JOIN operations
stmt = (
select(User, Address)
.join(User.addresses)
.where(User.name == "john_doe")
)
results = session.execute(stmt).all()
# Complex conditions
stmt = select(User).where(
and_(
User.name.like("john%"),
or_(
User.email.contains("gmail"),
User.email.contains("yahoo")
)
)
)
users = session.execute(stmt).scalars().all()
# Aggregation queries
stmt = (
select(User.name, func.count(Address.id).label("address_count"))
.join(User.addresses)
.group_by(User.name)
.having(func.count(Address.id) > 1)
.order_by(desc("address_count"))
)
results = session.execute(stmt).all()
# Subqueries
subq = (
select(func.count(Address.id))
.where(Address.user_id == User.id)
).scalar_subquery()
stmt = select(User).where(subq > 2)
users = session.execute(stmt).scalars().all()
Relationships and Eager Loading
from sqlalchemy.orm import selectinload, joinedload
with Session(engine) as session:
# Lazy Loading (default)
user = session.get(User, 1)
addresses = user.addresses # SQL is executed at this point
# Eager Loading - selectinload
stmt = select(User).options(selectinload(User.addresses))
users = session.execute(stmt).scalars().all()
# Eager Loading - joinedload
stmt = select(User).options(joinedload(User.addresses))
users = session.execute(stmt).unique().scalars().all()
# Filtering through relationships
stmt = select(User).where(
User.addresses.any(Address.email_address.like("%@gmail.com"))
)
gmail_users = session.execute(stmt).scalars().all()
Transaction Management
# Using context manager
with Session(engine) as session:
try:
user = User(name="test_user", email="[email protected]")
session.add(user)
address = Address(email_address="[email protected]", user=user)
session.add(address)
session.commit()
print("Transaction successful")
except Exception as e:
session.rollback()
print(f"Transaction failed: {e}")
# Explicit transaction control
session = Session(engine)
transaction = session.begin()
try:
# Data operations
user = User(name="explicit_user", email="[email protected]")
session.add(user)
session.flush() # Execute INSERT (before commit)
# Some processing
user_id = user.id
transaction.commit()
except Exception:
transaction.rollback()
raise
finally:
session.close()
Many-to-Many Relationship Implementation
from sqlalchemy import Table, Column
# Association table
student_course_table = Table(
'student_course',
Base.metadata,
Column('student_id', Integer, ForeignKey('students.id'), primary_key=True),
Column('course_id', Integer, ForeignKey('courses.id'), primary_key=True)
)
class Student(Base):
__tablename__ = 'students'
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str]
# Many-to-Many relationship
courses: Mapped[List["Course"]] = relationship(
secondary=student_course_table,
back_populates="students"
)
class Course(Base):
__tablename__ = 'courses'
id: Mapped[int] = mapped_column(primary_key=True)
title: Mapped[str]
students: Mapped[List["Student"]] = relationship(
secondary=student_course_table,
back_populates="courses"
)
# Usage example
with Session(engine) as session:
student = Student(name="Alice")
course1 = Course(title="Python Programming")
course2 = Course(title="Data Science")
student.courses.extend([course1, course2])
session.add(student)
session.commit()
Raw SQL Execution
from sqlalchemy import text
with Session(engine) as session:
# Execute raw SQL
result = session.execute(
text("SELECT name, email FROM user_account WHERE name = :name"),
{"name": "john_doe"}
)
rows = result.fetchall()
# Execute DDL
session.execute(text("CREATE INDEX idx_user_email ON user_account(email)"))
session.commit()
# Execute stored procedure (PostgreSQL example)
result = session.execute(
text("CALL update_user_stats(:user_id)"),
{"user_id": 1}
)
Alembic Migrations
# Initialize Alembic
alembic init alembic
# Create migration
alembic revision --autogenerate -m "Create user and address tables"
# Run migrations
alembic upgrade head
# Migration history
alembic history
# Downgrade to specific version
alembic downgrade -1
# Example alembic/env.py configuration
from sqlalchemy import engine_from_config
from alembic import context
from myapp.models import Base
config = context.config
target_metadata = Base.metadata
def run_migrations_online():
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()