Keras

High-level deep learning API. Integrated into TensorFlow 2.0+ providing intuitive and user-friendly interface. Supports from prototyping to production use, widely used from beginners to experts.

PythonDeep LearningNeural NetworksTensorFlowHigh-Level API

GitHub Overview

keras-team/keras

Deep Learning for humans

Stars61,876
Watchers2,567
Forks19,456
Created:March 28, 2014
Language:Python
License:Apache License 2.0

Topics

kerasdeep-learningmachine-learningneural-networkspythontensorflowaiapi

Star History

keras-team/keras Star History
Data as of: Invalid Date

Framework

Keras

Overview

Keras is a neural network library functioning as high-level API for TensorFlow, PyTorch, and JAX.

Details

Keras was developed by François Chollet in 2015 as a high-level neural network API for deep learning. Currently integrated into TensorFlow and provided as tf.keras. It aims to enable researchers and developers to rapidly prototype deep learning models through simple and intuitive APIs. Emphasizing user-friendly design, modularity, and extensibility, even beginners can easily build neural networks. It supports various architectures including CNNs, RNNs, and Transformers, providing three construction methods: Sequential API, Functional API, and Subclassing API. Used for a wide range of applications from prototyping to serious research and production deployment, it's optimal for deep learning education in educational settings. It supports efficient model development through rich pre-trained models, various callback functions, and visualization tools.

Pros and Cons

Pros

  • Intuitive API: Simple and understandable high-level interface
  • Rapid Development: Build neural networks with minimal code
  • TensorFlow Integration: Complete compatibility with TensorFlow ecosystem
  • Rich Pre-trained Models: Many available including ImageNet trained models
  • 3 Construction Methods: Flexibility with Sequential, Functional, and Subclassing APIs
  • Education-Oriented: Optimal design for deep learning education
  • Rich Documentation: Detailed guides and sample code

Cons

  • Abstraction Level: Low-level control can be difficult in some cases
  • TensorFlow Dependency: Subject to TensorFlow constraints
  • Customization: Complex implementation of advanced custom layers
  • Debugging: Abstraction makes understanding internal operations difficult
  • Latest Research: Implementation of newest architectures may take time
  • Performance: May have slower execution than low-level APIs in some cases

Key Links

Code Examples

Hello World

import tensorflow as tf
from tensorflow import keras
import numpy as np

# Check Keras version
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

# Simple Sequential model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# Display model summary
model.summary()

# Compile
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Test with sample data
X_sample = np.random.random((100, 10))
y_sample = np.random.randint(2, size=(100, 1))

# Simple training
history = model.fit(X_sample, y_sample, epochs=5, verbose=1)

# Prediction
predictions = model.predict(X_sample[:5])
print(f"Predictions: {predictions.flatten()}")

Image Classification with CNN

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Data preprocessing
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

print(f"Training data shape: {x_train.shape}")
print(f"Test data shape: {x_test.shape}")

# Build CNN model
model = keras.Sequential([
    # First block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.BatchNormalization(),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Second block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Third block
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.25),
    
    # Classification layers
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Display model structure
model.summary()

# Configure callbacks
callbacks = [
    keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5, min_lr=1e-7),
    keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True)
]

# Execute training
history = model.fit(
    x_train, y_train,
    batch_size=128,
    epochs=50,
    validation_data=(x_test, y_test),
    callbacks=callbacks,
    verbose=1
)

# Evaluation
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {test_acc:.4f}')

Complex Models with Functional API

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Define input layer
input_layer = keras.Input(shape=(224, 224, 3), name='image_input')

# Backbone network (ResNet-style)
def residual_block(x, filters, kernel_size=3, stride=1):
    shortcut = x
    
    # Main path
    x = layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv2D(filters, kernel_size, padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    # Shortcut connection
    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, 1, strides=stride, padding='same')(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)
    
    x = layers.Add()([x, shortcut])
    x = layers.ReLU()(x)
    
    return x

# Feature extraction
x = layers.Conv2D(64, 7, strides=2, padding='same')(input_layer)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(3, strides=2, padding='same')(x)

# Residual blocks
x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 128, stride=2)
x = residual_block(x, 128)
x = residual_block(x, 256, stride=2)
x = residual_block(x, 256)

# Global average pooling
feature_vector = layers.GlobalAveragePooling2D(name='feature_vector')(x)

# Classification head
classification_output = layers.Dense(1000, activation='softmax', name='classification')(feature_vector)

# Regression head (e.g., age prediction)
regression_output = layers.Dense(1, activation='linear', name='regression')(feature_vector)

# Create model
model = keras.Model(
    inputs=input_layer,
    outputs=[classification_output, regression_output],
    name='multi_output_model'
)

# Compile model (multi-task)
model.compile(
    optimizer='adam',
    loss={
        'classification': 'categorical_crossentropy',
        'regression': 'mse'
    },
    loss_weights={
        'classification': 1.0,
        'regression': 0.5
    },
    metrics={
        'classification': ['accuracy'],
        'regression': ['mae']
    }
)

model.summary()

Custom Layers and Models

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Define custom layer
class AttentionLayer(layers.Layer):
    def __init__(self, units, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)
        self.units = units
        
    def build(self, input_shape):
        self.W = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True,
            name='attention_weights'
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True,
            name='attention_bias'
        )
        self.u = self.add_weight(
            shape=(self.units,),
            initializer='random_normal',
            trainable=True,
            name='attention_context'
        )
        super(AttentionLayer, self).build(input_shape)
    
    def call(self, inputs):
        # Attention mechanism computation
        score = tf.nn.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
        attention_weights = tf.nn.softmax(tf.tensordot(score, self.u, axes=1), axis=1)
        context_vector = tf.reduce_sum(inputs * tf.expand_dims(attention_weights, -1), axis=1)
        return context_vector, attention_weights
    
    def get_config(self):
        config = super(AttentionLayer, self).get_config()
        config.update({'units': self.units})
        return config

# Custom model class
class TextClassificationModel(keras.Model):
    def __init__(self, vocab_size, embedding_dim, max_length, num_classes):
        super(TextClassificationModel, self).__init__()
        self.embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)
        self.lstm = layers.LSTM(128, return_sequences=True)
        self.attention = AttentionLayer(64)
        self.dropout = layers.Dropout(0.5)
        self.classifier = layers.Dense(num_classes, activation='softmax')
    
    def call(self, inputs, training=None):
        x = self.embedding(inputs)
        x = self.lstm(x)
        context_vector, attention_weights = self.attention(x)
        x = self.dropout(context_vector, training=training)
        return self.classifier(x)

# Model instantiation
model = TextClassificationModel(
    vocab_size=10000,
    embedding_dim=100,
    max_length=500,
    num_classes=5
)

# Model compilation
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Determine model shape with dummy data
dummy_input = tf.random.uniform((1, 500), maxval=10000, dtype=tf.int32)
_ = model(dummy_input)

model.summary()

Transfer Learning

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50

# Load pre-trained model
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

# Freeze base model weights
base_model.trainable = False

# Add custom classifier
model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.2),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(10, activation='softmax')  # 10-class classification
])

# Initial training (feature extraction)
model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Phase 1: Feature extraction")
# history1 = model.fit(train_dataset, epochs=10, validation_data=val_dataset)

# Fine-tuning
base_model.trainable = True

# Make only last few layers trainable
for layer in base_model.layers[:-20]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Phase 2: Fine-tuning")
# history2 = model.fit(train_dataset, epochs=10, validation_data=val_dataset)

model.summary()

Model Saving and Loading

import tensorflow as tf
from tensorflow import keras

# Build model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(20,)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train with sample data
import numpy as np
X_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), 10)

model.fit(X_train, y_train, epochs=10, verbose=0)

# 1. Save entire model (recommended)
model.save('complete_model.keras')

# 2. Save in HDF5 format
model.save('model.h5')

# 3. Save weights only
model.save_weights('model_weights.h5')

# 4. Save model architecture (JSON)
model_json = model.to_json()
with open('model_architecture.json', 'w') as f:
    f.write(model_json)

# Load models
# 1. Load complete model
loaded_model = keras.models.load_model('complete_model.keras')

# 2. Load from HDF5 format
loaded_model_h5 = keras.models.load_model('model.h5')

# 3. Load architecture and weights separately
with open('model_architecture.json', 'r') as f:
    model_json = f.read()

loaded_model_from_json = keras.models.model_from_json(model_json)
loaded_model_from_json.load_weights('model_weights.h5')
loaded_model_from_json.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Save/load in SavedModel format
tf.saved_model.save(model, 'saved_model_dir')
loaded_saved_model = tf.saved_model.load('saved_model_dir')

# Verify predictions
test_input = np.random.random((5, 20))
original_pred = model.predict(test_input)
loaded_pred = loaded_model.predict(test_input)

print("Prediction consistency:", np.allclose(original_pred, loaded_pred))
print(f"Original model accuracy: {model.evaluate(X_train, y_train, verbose=0)[1]:.4f}")
print(f"Loaded model accuracy: {loaded_model.evaluate(X_train, y_train, verbose=0)[1]:.4f}")