Keras
High-level deep learning API. Integrated into TensorFlow 2.0+ providing intuitive and user-friendly interface. Supports from prototyping to production use, widely used from beginners to experts.
GitHub Overview
keras-team/keras
Deep Learning for humans
Topics
Star History
Framework
Keras
Overview
Keras is a neural network library functioning as high-level API for TensorFlow, PyTorch, and JAX.
Details
Keras was developed by François Chollet in 2015 as a high-level neural network API for deep learning. Currently integrated into TensorFlow and provided as tf.keras. It aims to enable researchers and developers to rapidly prototype deep learning models through simple and intuitive APIs. Emphasizing user-friendly design, modularity, and extensibility, even beginners can easily build neural networks. It supports various architectures including CNNs, RNNs, and Transformers, providing three construction methods: Sequential API, Functional API, and Subclassing API. Used for a wide range of applications from prototyping to serious research and production deployment, it's optimal for deep learning education in educational settings. It supports efficient model development through rich pre-trained models, various callback functions, and visualization tools.
Pros and Cons
Pros
- Intuitive API: Simple and understandable high-level interface
- Rapid Development: Build neural networks with minimal code
- TensorFlow Integration: Complete compatibility with TensorFlow ecosystem
- Rich Pre-trained Models: Many available including ImageNet trained models
- 3 Construction Methods: Flexibility with Sequential, Functional, and Subclassing APIs
- Education-Oriented: Optimal design for deep learning education
- Rich Documentation: Detailed guides and sample code
Cons
- Abstraction Level: Low-level control can be difficult in some cases
- TensorFlow Dependency: Subject to TensorFlow constraints
- Customization: Complex implementation of advanced custom layers
- Debugging: Abstraction makes understanding internal operations difficult
- Latest Research: Implementation of newest architectures may take time
- Performance: May have slower execution than low-level APIs in some cases
Key Links
- Keras Official Site
- TensorFlow Keras Guide
- Keras Official Documentation
- Keras GitHub Repository
- Keras Applications
- Keras Tutorials
Code Examples
Hello World
import tensorflow as tf
from tensorflow import keras
import numpy as np
# Check Keras version
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")
# Simple Sequential model
model = keras.Sequential([
keras.layers.Dense(64, activation='relu', input_shape=(10,)),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dense(1, activation='sigmoid')
])
# Display model summary
model.summary()
# Compile
model.compile(
optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
# Test with sample data
X_sample = np.random.random((100, 10))
y_sample = np.random.randint(2, size=(100, 1))
# Simple training
history = model.fit(X_sample, y_sample, epochs=5, verbose=1)
# Prediction
predictions = model.predict(X_sample[:5])
print(f"Predictions: {predictions.flatten()}")
Image Classification with CNN
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
# Data preprocessing
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
print(f"Training data shape: {x_train.shape}")
print(f"Test data shape: {x_test.shape}")
# Build CNN model
model = keras.Sequential([
# First block
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.BatchNormalization(),
layers.Conv2D(32, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Second block
layers.Conv2D(64, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Third block
layers.Conv2D(128, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.Dropout(0.25),
# Classification layers
layers.Flatten(),
layers.Dense(512, activation='relu'),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(10, activation='softmax')
])
# Compile model
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Display model structure
model.summary()
# Configure callbacks
callbacks = [
keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5, min_lr=1e-7),
keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True)
]
# Execute training
history = model.fit(
x_train, y_train,
batch_size=128,
epochs=50,
validation_data=(x_test, y_test),
callbacks=callbacks,
verbose=1
)
# Evaluation
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {test_acc:.4f}')
Complex Models with Functional API
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Define input layer
input_layer = keras.Input(shape=(224, 224, 3), name='image_input')
# Backbone network (ResNet-style)
def residual_block(x, filters, kernel_size=3, stride=1):
shortcut = x
# Main path
x = layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.Conv2D(filters, kernel_size, padding='same')(x)
x = layers.BatchNormalization()(x)
# Shortcut connection
if stride != 1 or shortcut.shape[-1] != filters:
shortcut = layers.Conv2D(filters, 1, strides=stride, padding='same')(shortcut)
shortcut = layers.BatchNormalization()(shortcut)
x = layers.Add()([x, shortcut])
x = layers.ReLU()(x)
return x
# Feature extraction
x = layers.Conv2D(64, 7, strides=2, padding='same')(input_layer)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(3, strides=2, padding='same')(x)
# Residual blocks
x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 128, stride=2)
x = residual_block(x, 128)
x = residual_block(x, 256, stride=2)
x = residual_block(x, 256)
# Global average pooling
feature_vector = layers.GlobalAveragePooling2D(name='feature_vector')(x)
# Classification head
classification_output = layers.Dense(1000, activation='softmax', name='classification')(feature_vector)
# Regression head (e.g., age prediction)
regression_output = layers.Dense(1, activation='linear', name='regression')(feature_vector)
# Create model
model = keras.Model(
inputs=input_layer,
outputs=[classification_output, regression_output],
name='multi_output_model'
)
# Compile model (multi-task)
model.compile(
optimizer='adam',
loss={
'classification': 'categorical_crossentropy',
'regression': 'mse'
},
loss_weights={
'classification': 1.0,
'regression': 0.5
},
metrics={
'classification': ['accuracy'],
'regression': ['mae']
}
)
model.summary()
Custom Layers and Models
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Define custom layer
class AttentionLayer(layers.Layer):
def __init__(self, units, **kwargs):
super(AttentionLayer, self).__init__(**kwargs)
self.units = units
def build(self, input_shape):
self.W = self.add_weight(
shape=(input_shape[-1], self.units),
initializer='random_normal',
trainable=True,
name='attention_weights'
)
self.b = self.add_weight(
shape=(self.units,),
initializer='zeros',
trainable=True,
name='attention_bias'
)
self.u = self.add_weight(
shape=(self.units,),
initializer='random_normal',
trainable=True,
name='attention_context'
)
super(AttentionLayer, self).build(input_shape)
def call(self, inputs):
# Attention mechanism computation
score = tf.nn.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
attention_weights = tf.nn.softmax(tf.tensordot(score, self.u, axes=1), axis=1)
context_vector = tf.reduce_sum(inputs * tf.expand_dims(attention_weights, -1), axis=1)
return context_vector, attention_weights
def get_config(self):
config = super(AttentionLayer, self).get_config()
config.update({'units': self.units})
return config
# Custom model class
class TextClassificationModel(keras.Model):
def __init__(self, vocab_size, embedding_dim, max_length, num_classes):
super(TextClassificationModel, self).__init__()
self.embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)
self.lstm = layers.LSTM(128, return_sequences=True)
self.attention = AttentionLayer(64)
self.dropout = layers.Dropout(0.5)
self.classifier = layers.Dense(num_classes, activation='softmax')
def call(self, inputs, training=None):
x = self.embedding(inputs)
x = self.lstm(x)
context_vector, attention_weights = self.attention(x)
x = self.dropout(context_vector, training=training)
return self.classifier(x)
# Model instantiation
model = TextClassificationModel(
vocab_size=10000,
embedding_dim=100,
max_length=500,
num_classes=5
)
# Model compilation
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Determine model shape with dummy data
dummy_input = tf.random.uniform((1, 500), maxval=10000, dtype=tf.int32)
_ = model(dummy_input)
model.summary()
Transfer Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50
# Load pre-trained model
base_model = ResNet50(
weights='imagenet',
include_top=False,
input_shape=(224, 224, 3)
)
# Freeze base model weights
base_model.trainable = False
# Add custom classifier
model = keras.Sequential([
base_model,
layers.GlobalAveragePooling2D(),
layers.Dropout(0.2),
layers.Dense(128, activation='relu'),
layers.Dropout(0.2),
layers.Dense(10, activation='softmax') # 10-class classification
])
# Initial training (feature extraction)
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss='categorical_crossentropy',
metrics=['accuracy']
)
print("Phase 1: Feature extraction")
# history1 = model.fit(train_dataset, epochs=10, validation_data=val_dataset)
# Fine-tuning
base_model.trainable = True
# Make only last few layers trainable
for layer in base_model.layers[:-20]:
layer.trainable = False
# Recompile with lower learning rate
model.compile(
optimizer=keras.optimizers.Adam(1e-5),
loss='categorical_crossentropy',
metrics=['accuracy']
)
print("Phase 2: Fine-tuning")
# history2 = model.fit(train_dataset, epochs=10, validation_data=val_dataset)
model.summary()
Model Saving and Loading
import tensorflow as tf
from tensorflow import keras
# Build model
model = keras.Sequential([
keras.layers.Dense(64, activation='relu', input_shape=(20,)),
keras.layers.Dropout(0.3),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dense(10, activation='softmax')
])
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Train with sample data
import numpy as np
X_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), 10)
model.fit(X_train, y_train, epochs=10, verbose=0)
# 1. Save entire model (recommended)
model.save('complete_model.keras')
# 2. Save in HDF5 format
model.save('model.h5')
# 3. Save weights only
model.save_weights('model_weights.h5')
# 4. Save model architecture (JSON)
model_json = model.to_json()
with open('model_architecture.json', 'w') as f:
f.write(model_json)
# Load models
# 1. Load complete model
loaded_model = keras.models.load_model('complete_model.keras')
# 2. Load from HDF5 format
loaded_model_h5 = keras.models.load_model('model.h5')
# 3. Load architecture and weights separately
with open('model_architecture.json', 'r') as f:
model_json = f.read()
loaded_model_from_json = keras.models.model_from_json(model_json)
loaded_model_from_json.load_weights('model_weights.h5')
loaded_model_from_json.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Save/load in SavedModel format
tf.saved_model.save(model, 'saved_model_dir')
loaded_saved_model = tf.saved_model.load('saved_model_dir')
# Verify predictions
test_input = np.random.random((5, 20))
original_pred = model.predict(test_input)
loaded_pred = loaded_model.predict(test_input)
print("Prediction consistency:", np.allclose(original_pred, loaded_pred))
print(f"Original model accuracy: {model.evaluate(X_train, y_train, verbose=0)[1]:.4f}")
print(f"Loaded model accuracy: {loaded_model.evaluate(X_train, y_train, verbose=0)[1]:.4f}")