Keras

High-level deep learning API. Integrated into TensorFlow 2.0+ providing intuitive and user-friendly interface. Supports from prototyping to production use, widely used from beginners to experts.

PythonDeep LearningNeural NetworksTensorFlowHigh-Level API

GitHub Overview

keras-team/keras

Deep Learning for humans

Repository:https://github.com/keras-team/keras

Homepage:https://keras.io/

Stars61,876

Watchers2,567

Forks19,456

Created:March 28, 2014

Language:Python

License:Apache License 2.0

Topics

kerasdeep-learningmachine-learningneural-networkspythontensorflowaiapi

Star History

Data as of: Invalid Date

Framework

Keras

Overview

Keras is a neural network library functioning as high-level API for TensorFlow, PyTorch, and JAX.

Details

Keras was developed by François Chollet in 2015 as a high-level neural network API for deep learning. Currently integrated into TensorFlow and provided as tf.keras. It aims to enable researchers and developers to rapidly prototype deep learning models through simple and intuitive APIs. Emphasizing user-friendly design, modularity, and extensibility, even beginners can easily build neural networks. It supports various architectures including CNNs, RNNs, and Transformers, providing three construction methods: Sequential API, Functional API, and Subclassing API. Used for a wide range of applications from prototyping to serious research and production deployment, it's optimal for deep learning education in educational settings. It supports efficient model development through rich pre-trained models, various callback functions, and visualization tools.

Pros and Cons

Pros

Intuitive API: Simple and understandable high-level interface
Rapid Development: Build neural networks with minimal code
TensorFlow Integration: Complete compatibility with TensorFlow ecosystem
Rich Pre-trained Models: Many available including ImageNet trained models
3 Construction Methods: Flexibility with Sequential, Functional, and Subclassing APIs
Education-Oriented: Optimal design for deep learning education
Rich Documentation: Detailed guides and sample code

Cons

Abstraction Level: Low-level control can be difficult in some cases
TensorFlow Dependency: Subject to TensorFlow constraints
Customization: Complex implementation of advanced custom layers
Debugging: Abstraction makes understanding internal operations difficult
Latest Research: Implementation of newest architectures may take time
Performance: May have slower execution than low-level APIs in some cases

Key Links

Code Examples

Hello World

import tensorflow as tf
from tensorflow import keras
import numpy as np

# Check Keras version
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

# Simple Sequential model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# Display model summary
model.summary()

# Compile
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Test with sample data
X_sample = np.random.random((100, 10))
y_sample = np.random.randint(2, size=(100, 1))

# Simple training
history = model.fit(X_sample, y_sample, epochs=5, verbose=1)

# Prediction
predictions = model.predict(X_sample[:5])
print(f"Predictions: {predictions.flatten()}")

Image Classification with CNN

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Data preprocessing
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

print(f"Training data shape: {x_train.shape}")
print(f"Test data shape: {x_test.shape}")

# Build CNN model
model = keras.Sequential([
    # First block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.BatchNormalization(),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Second block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Third block
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.25),
    
    # Classification layers
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Display model structure
model.summary()

# Configure callbacks
callbacks = [
    keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5, min_lr=1e-7),
    keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True)
]

# Execute training
history = model.fit(
    x_train, y_train,
    batch_size=128,
    epochs=50,
    validation_data=(x_test, y_test),
    callbacks=callbacks,
    verbose=1
)

# Evaluation
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f'Test accuracy: {test_acc:.4f}')

Complex Models with Functional API

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Define input layer
input_layer = keras.Input(shape=(224, 224, 3), name='image_input')

# Backbone network (ResNet-style)
def residual_block(x, filters, kernel_size=3, stride=1):
    shortcut = x
    
    # Main path
    x = layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    
    x = layers.Conv2D(filters, kernel_size, padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    # Shortcut connection
    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, 1, strides=stride, padding='same')(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)
    
    x = layers.Add()([x, shortcut])
    x = layers.ReLU()(x)
    
    return x

# Feature extraction
x = layers.Conv2D(64, 7, strides=2, padding='same')(input_layer)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = layers.MaxPooling2D(3, strides=2, padding='same')(x)

# Residual blocks
x = residual_block(x, 64)
x = residual_block(x, 64)
x = residual_block(x, 128, stride=2)
x = residual_block(x, 128)
x = residual_block(x, 256, stride=2)
x = residual_block(x, 256)

# Global average pooling
feature_vector = layers.GlobalAveragePooling2D(name='feature_vector')(x)

# Classification head
classification_output = layers.Dense(1000, activation='softmax', name='classification')(feature_vector)

# Regression head (e.g., age prediction)
regression_output = layers.Dense(1, activation='linear', name='regression')(feature_vector)

# Create model
model = keras.Model(
    inputs=input_layer,
    outputs=[classification_output, regression_output],
    name='multi_output_model'
)

# Compile model (multi-task)
model.compile(
    optimizer='adam',
    loss={
        'classification': 'categorical_crossentropy',
        'regression': 'mse'
    },
    loss_weights={
        'classification': 1.0,
        'regression': 0.5
    },
    metrics={
        'classification': ['accuracy'],
        'regression': ['mae']
    }
)

model.summary()

Custom Layers and Models

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Define custom layer
class AttentionLayer(layers.Layer):
    def __init__(self, units, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)
        self.units = units
        
    def build(self, input_shape):
        self.W = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True,
            name='attention_weights'
        )
        self.b = self.add_weight(
            shape=(self.units,),
            initializer='zeros',
            trainable=True,
            name='attention_bias'
        )
        self.u = self.add_weight(
            shape=(self.units,),
            initializer='random_normal',
            trainable=True,
            name='attention_context'
        )
        super(AttentionLayer, self).build(input_shape)
    
    def call(self, inputs):
        # Attention mechanism computation
        score = tf.nn.tanh(tf.tensordot(inputs, self.W, axes=1) + self.b)
        attention_weights = tf.nn.softmax(tf.tensordot(score, self.u, axes=1), axis=1)
        context_vector = tf.reduce_sum(inputs * tf.expand_dims(attention_weights, -1), axis=1)
        return context_vector, attention_weights
    
    def get_config(self):
        config = super(AttentionLayer, self).get_config()
        config.update({'units': self.units})
        return config

# Custom model class
class TextClassificationModel(keras.Model):
    def __init__(self, vocab_size, embedding_dim, max_length, num_classes):
        super(TextClassificationModel, self).__init__()
        self.embedding = layers.Embedding(vocab_size, embedding_dim, input_length=max_length)
        self.lstm = layers.LSTM(128, return_sequences=True)
        self.attention = AttentionLayer(64)
        self.dropout = layers.Dropout(0.5)
        self.classifier = layers.Dense(num_classes, activation='softmax')
    
    def call(self, inputs, training=None):
        x = self.embedding(inputs)
        x = self.lstm(x)
        context_vector, attention_weights = self.attention(x)
        x = self.dropout(context_vector, training=training)
        return self.classifier(x)

# Model instantiation
model = TextClassificationModel(
    vocab_size=10000,
    embedding_dim=100,
    max_length=500,
    num_classes=5
)

# Model compilation
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Determine model shape with dummy data
dummy_input = tf.random.uniform((1, 500), maxval=10000, dtype=tf.int32)
_ = model(dummy_input)

model.summary()

Transfer Learning

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50

# Load pre-trained model
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

# Freeze base model weights
base_model.trainable = False

# Add custom classifier
model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.2),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(10, activation='softmax')  # 10-class classification
])

# Initial training (feature extraction)
model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Phase 1: Feature extraction")
# history1 = model.fit(train_dataset, epochs=10, validation_data=val_dataset)

# Fine-tuning
base_model.trainable = True

# Make only last few layers trainable
for layer in base_model.layers[:-20]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Phase 2: Fine-tuning")
# history2 = model.fit(train_dataset, epochs=10, validation_data=val_dataset)

model.summary()

Model Saving and Loading

import tensorflow as tf
from tensorflow import keras

# Build model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(20,)),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train with sample data
import numpy as np
X_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), 10)

model.fit(X_train, y_train, epochs=10, verbose=0)

# 1. Save entire model (recommended)
model.save('complete_model.keras')

# 2. Save in HDF5 format
model.save('model.h5')

# 3. Save weights only
model.save_weights('model_weights.h5')

# 4. Save model architecture (JSON)
model_json = model.to_json()
with open('model_architecture.json', 'w') as f:
    f.write(model_json)

# Load models
# 1. Load complete model
loaded_model = keras.models.load_model('complete_model.keras')

# 2. Load from HDF5 format
loaded_model_h5 = keras.models.load_model('model.h5')

# 3. Load architecture and weights separately
with open('model_architecture.json', 'r') as f:
    model_json = f.read()

loaded_model_from_json = keras.models.model_from_json(model_json)
loaded_model_from_json.load_weights('model_weights.h5')
loaded_model_from_json.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Save/load in SavedModel format
tf.saved_model.save(model, 'saved_model_dir')
loaded_saved_model = tf.saved_model.load('saved_model_dir')

# Verify predictions
test_input = np.random.random((5, 20))
original_pred = model.predict(test_input)
loaded_pred = loaded_model.predict(test_input)

print("Prediction consistency:", np.allclose(original_pred, loaded_pred))
print(f"Original model accuracy: {model.evaluate(X_train, y_train, verbose=0)[1]:.4f}")
print(f"Loaded model accuracy: {loaded_model.evaluate(X_train, y_train, verbose=0)[1]:.4f}")