PyTorch
Dynamic deep learning framework developed by Meta (formerly Facebook). Features dynamic computation graphs, Python-first design, and intuitive APIs. Overwhelmingly supported in research fields and standardly used for academic paper implementations.
GitHub Overview
pytorch/pytorch
Tensors and Dynamic neural networks in Python with strong GPU acceleration
Topics
Star History
Framework
PyTorch
Overview
PyTorch is a machine learning framework developed by Facebook featuring dynamic computation graphs.
Details
PyTorch is an open-source machine learning library developed by Facebook (now Meta) in 2016, featuring dynamic neural networks and automatic differentiation. It provides flexibility suitable for research and development through Python-like intuitive APIs and dynamic computation graphs (Define-by-Run). Key features include high compatibility with NumPy, GPU acceleration, and distributed learning support. It's particularly popular in academia and research institutions, used from prototyping to full-scale research. With specialized library ecosystems like torchvision, torchaudio, and torchtext, production deployment via TorchScript, and high-level abstractions through PyTorch Lightning, it covers everything from research to practical applications. Due to its intuitive Pythonic API and ease of debugging, it's chosen by many developers for machine learning education, experimentation, and research development.
Pros and Cons
Pros
- Intuitive API: Natural Python-like syntax that's easy to write
- Dynamic Computation Graph: Runtime graph construction enabling flexible model design
- Easy Debugging: Step-through execution possible with standard Python debuggers
- Research-Oriented: Well-suited for implementing experimental architectures
- NumPy Compatible: Easy conversion with NumPy, low learning curve
- Rich Community: Academic adoption provides abundant cutting-edge research implementations
- GPU Optimization: Efficient GPU utilization through CUDA integration
Cons
- Production Deployment: More limited production tooling compared to TensorFlow
- Mobile Support: More restricted mobile device execution support than TensorFlow Lite
- Performance: Dynamic nature can lead to slower execution than static graphs
- Version Compatibility: APIs may change with new versions
- Learning Resources: Fewer beginner materials compared to TensorFlow
Key Links
- PyTorch Official Site
- PyTorch Official Documentation
- PyTorch GitHub Repository
- PyTorch Tutorials
- PyTorch Lightning
- PyTorch Hub
Code Examples
Hello World
import torch
import torch.nn as nn
import torch.nn.functional as F
# Check PyTorch version
print(f"PyTorch version: {torch.__version__}")
# Basic tensor operations
x = torch.tensor([[1., 2., 3.], [4., 5., 6.]])
print("Tensor x:")
print(x)
# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Move tensor to GPU (if available)
if torch.cuda.is_available():
x = x.to(device)
print("Tensor moved to GPU")
# Basic operations
y = x * 2
z = torch.matmul(x, x.T)
print(f"x * 2:\n{y}")
print(f"Matrix multiplication:\n{z}")
Simple Neural Network
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Simple neural network definition
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
# Model initialization
model = SimpleNet(10, 64, 2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Sample data creation
X = torch.randn(1000, 10)
y = torch.randint(0, 2, (1000,))
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# Training loop
model.train()
for epoch in range(100):
total_loss = 0
for batch_X, batch_y in dataloader:
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
total_loss += loss.item()
if epoch % 20 == 0:
print(f"Epoch {epoch}: Loss = {total_loss:.4f}")
CNN Image Classification
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
# Data preprocessing
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# CIFAR-10 dataset loading
trainset = torchvision.datasets.CIFAR10(
root='./data', train=True, download=True, transform=transform
)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)
# CNN model definition
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 4 * 4, 512)
self.fc2 = nn.Linear(512, 10)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = x.view(-1, 64 * 4 * 4)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
# Model, loss function, optimizer setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training
model.train()
for epoch in range(10):
running_loss = 0.0
for i, (inputs, labels) in enumerate(trainloader):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 200 == 199:
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
running_loss = 0.0
Custom Dataset
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
class CustomDataset(Dataset):
def __init__(self, csv_file, transform=None):
self.data = pd.read_csv(csv_file)
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
# Data extraction (example: first column as label, rest as features)
label = self.data.iloc[idx, 0]
features = self.data.iloc[idx, 1:].values.astype(np.float32)
sample = {'features': features, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
# Custom transforms
class ToTensor:
def __call__(self, sample):
features, label = sample['features'], sample['label']
return {
'features': torch.from_tensor(features),
'label': torch.tensor(label, dtype=torch.long)
}
class Normalize:
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, sample):
features = sample['features']
features = (features - self.mean) / self.std
return {'features': features, 'label': sample['label']}
# Dataset and dataloader creation
transforms_list = [ToTensor(), Normalize(0.5, 0.5)]
composed = transforms.Compose(transforms_list)
# Custom dataset usage example
# dataset = CustomDataset('data.csv', transform=composed)
# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
print("Custom dataset class defined")
Transfer Learning
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
# Load pre-trained model
model = models.resnet50(pretrained=True)
# Make only final layer trainable (feature extraction)
for param in model.parameters():
param.requires_grad = False
# Replace final layer (for new task)
num_classes = 10 # Number of classes for new task
model.fc = nn.Linear(model.fc.in_features, num_classes)
# Make only final layer trainable
for param in model.fc.parameters():
param.requires_grad = True
# Move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
# Fine-tuning case (train all layers)
def enable_fine_tuning(model, lr=0.0001):
for param in model.parameters():
param.requires_grad = True
# Layer-wise learning rate setting
optimizer = optim.Adam([
{'params': model.conv1.parameters(), 'lr': lr * 0.1},
{'params': model.layer1.parameters(), 'lr': lr * 0.1},
{'params': model.layer2.parameters(), 'lr': lr * 0.5},
{'params': model.layer3.parameters(), 'lr': lr * 0.5},
{'params': model.layer4.parameters(), 'lr': lr},
{'params': model.fc.parameters(), 'lr': lr * 10}
])
return optimizer
print("Transfer learning model configured")
print(f"Model final layer: {model.fc}")
Model Saving and Loading
import torch
import torch.nn as nn
# Model definition
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(10, 50)
self.fc2 = nn.Linear(50, 10)
self.fc3 = nn.Linear(10, 1)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
# Model instantiation and training (simple example)
model = Net()
optimizer = torch.optim.Adam(model.parameters())
# Sample training
x = torch.randn(100, 10)
y = torch.randn(100, 1)
for epoch in range(100):
optimizer.zero_grad()
output = model(x)
loss = nn.MSELoss()(output, y)
loss.backward()
optimizer.step()
# 1. Save/load entire model
torch.save(model, 'complete_model.pth')
loaded_model = torch.load('complete_model.pth')
# 2. Save/load weights only (recommended)
torch.save(model.state_dict(), 'model_weights.pth')
# For loading, redefine model structure then load weights
new_model = Net()
new_model.load_state_dict(torch.load('model_weights.pth'))
# 3. Checkpoint (model + optimizer state)
checkpoint = {
'epoch': 100,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss.item()
}
torch.save(checkpoint, 'checkpoint.pth')
# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
# Set to evaluation mode
model.eval()
print("Model saving and loading completed")
print(f"Final epoch: {epoch}, Final loss: {loss:.4f}")