PyTorch

Dynamic deep learning framework developed by Meta (formerly Facebook). Features dynamic computation graphs, Python-first design, and intuitive APIs. Overwhelmingly supported in research fields and standardly used for academic paper implementations.

PythonMachine LearningDeep LearningAIFacebookDynamic Computation Graph

GitHub Overview

pytorch/pytorch

Tensors and Dynamic neural networks in Python with strong GPU acceleration

Stars84,567
Watchers2,345
Forks23,456
Created:August 13, 2016
Language:Python
License:BSD 3-Clause License

Topics

pytorchmachine-learningdeep-learningneural-networkspythongpuresearchai

Star History

pytorch/pytorch Star History
Data as of: Invalid Date

Framework

PyTorch

Overview

PyTorch is a machine learning framework developed by Facebook featuring dynamic computation graphs.

Details

PyTorch is an open-source machine learning library developed by Facebook (now Meta) in 2016, featuring dynamic neural networks and automatic differentiation. It provides flexibility suitable for research and development through Python-like intuitive APIs and dynamic computation graphs (Define-by-Run). Key features include high compatibility with NumPy, GPU acceleration, and distributed learning support. It's particularly popular in academia and research institutions, used from prototyping to full-scale research. With specialized library ecosystems like torchvision, torchaudio, and torchtext, production deployment via TorchScript, and high-level abstractions through PyTorch Lightning, it covers everything from research to practical applications. Due to its intuitive Pythonic API and ease of debugging, it's chosen by many developers for machine learning education, experimentation, and research development.

Pros and Cons

Pros

  • Intuitive API: Natural Python-like syntax that's easy to write
  • Dynamic Computation Graph: Runtime graph construction enabling flexible model design
  • Easy Debugging: Step-through execution possible with standard Python debuggers
  • Research-Oriented: Well-suited for implementing experimental architectures
  • NumPy Compatible: Easy conversion with NumPy, low learning curve
  • Rich Community: Academic adoption provides abundant cutting-edge research implementations
  • GPU Optimization: Efficient GPU utilization through CUDA integration

Cons

  • Production Deployment: More limited production tooling compared to TensorFlow
  • Mobile Support: More restricted mobile device execution support than TensorFlow Lite
  • Performance: Dynamic nature can lead to slower execution than static graphs
  • Version Compatibility: APIs may change with new versions
  • Learning Resources: Fewer beginner materials compared to TensorFlow

Key Links

Code Examples

Hello World

import torch
import torch.nn as nn
import torch.nn.functional as F

# Check PyTorch version
print(f"PyTorch version: {torch.__version__}")

# Basic tensor operations
x = torch.tensor([[1., 2., 3.], [4., 5., 6.]])
print("Tensor x:")
print(x)

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move tensor to GPU (if available)
if torch.cuda.is_available():
    x = x.to(device)
    print("Tensor moved to GPU")

# Basic operations
y = x * 2
z = torch.matmul(x, x.T)
print(f"x * 2:\n{y}")
print(f"Matrix multiplication:\n{z}")

Simple Neural Network

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Simple neural network definition
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Model initialization
model = SimpleNet(10, 64, 2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Sample data creation
X = torch.randn(1000, 10)
y = torch.randint(0, 2, (1000,))
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Training loop
model.train()
for epoch in range(100):
    total_loss = 0
    for batch_X, batch_y in dataloader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    if epoch % 20 == 0:
        print(f"Epoch {epoch}: Loss = {total_loss:.4f}")

CNN Image Classification

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Data preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# CIFAR-10 dataset loading
trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform
)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)

# CNN model definition
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Model, loss function, optimizer setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
model.train()
for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if i % 200 == 199:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

Custom Dataset

import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        # Data extraction (example: first column as label, rest as features)
        label = self.data.iloc[idx, 0]
        features = self.data.iloc[idx, 1:].values.astype(np.float32)
        
        sample = {'features': features, 'label': label}
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample

# Custom transforms
class ToTensor:
    def __call__(self, sample):
        features, label = sample['features'], sample['label']
        return {
            'features': torch.from_tensor(features),
            'label': torch.tensor(label, dtype=torch.long)
        }

class Normalize:
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std
    
    def __call__(self, sample):
        features = sample['features']
        features = (features - self.mean) / self.std
        return {'features': features, 'label': sample['label']}

# Dataset and dataloader creation
transforms_list = [ToTensor(), Normalize(0.5, 0.5)]
composed = transforms.Compose(transforms_list)

# Custom dataset usage example
# dataset = CustomDataset('data.csv', transform=composed)
# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

print("Custom dataset class defined")

Transfer Learning

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms

# Load pre-trained model
model = models.resnet50(pretrained=True)

# Make only final layer trainable (feature extraction)
for param in model.parameters():
    param.requires_grad = False

# Replace final layer (for new task)
num_classes = 10  # Number of classes for new task
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Make only final layer trainable
for param in model.fc.parameters():
    param.requires_grad = True

# Move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Fine-tuning case (train all layers)
def enable_fine_tuning(model, lr=0.0001):
    for param in model.parameters():
        param.requires_grad = True
    
    # Layer-wise learning rate setting
    optimizer = optim.Adam([
        {'params': model.conv1.parameters(), 'lr': lr * 0.1},
        {'params': model.layer1.parameters(), 'lr': lr * 0.1},
        {'params': model.layer2.parameters(), 'lr': lr * 0.5},
        {'params': model.layer3.parameters(), 'lr': lr * 0.5},
        {'params': model.layer4.parameters(), 'lr': lr},
        {'params': model.fc.parameters(), 'lr': lr * 10}
    ])
    
    return optimizer

print("Transfer learning model configured")
print(f"Model final layer: {model.fc}")

Model Saving and Loading

import torch
import torch.nn as nn

# Model definition
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.fc2 = nn.Linear(50, 10)
        self.fc3 = nn.Linear(10, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Model instantiation and training (simple example)
model = Net()
optimizer = torch.optim.Adam(model.parameters())

# Sample training
x = torch.randn(100, 10)
y = torch.randn(100, 1)

for epoch in range(100):
    optimizer.zero_grad()
    output = model(x)
    loss = nn.MSELoss()(output, y)
    loss.backward()
    optimizer.step()

# 1. Save/load entire model
torch.save(model, 'complete_model.pth')
loaded_model = torch.load('complete_model.pth')

# 2. Save/load weights only (recommended)
torch.save(model.state_dict(), 'model_weights.pth')

# For loading, redefine model structure then load weights
new_model = Net()
new_model.load_state_dict(torch.load('model_weights.pth'))

# 3. Checkpoint (model + optimizer state)
checkpoint = {
    'epoch': 100,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss.item()
}
torch.save(checkpoint, 'checkpoint.pth')

# Load checkpoint
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

# Set to evaluation mode
model.eval()

print("Model saving and loading completed")
print(f"Final epoch: {epoch}, Final loss: {loss:.4f}")