Database
Amazon DocumentDB
Overview
Amazon DocumentDB is a fully managed MongoDB-compatible document database service provided by AWS. It maintains high compatibility with MongoDB APIs while delivering high availability, durability, and scalability through AWS's proprietary distributed storage architecture.
Details
Amazon DocumentDB was released by AWS in 2019, designed to run MongoDB workloads in a cloud-native manner. It features the following characteristics:
Key Features
- MongoDB Compatibility: Full compatibility with MongoDB 3.6, 4.0, and 5.0 APIs
- Distributed Architecture: Automatically replicates to 6 replicas across 3 Availability Zones
- Auto Scaling: Storage automatically scales up to 64TB as needed
- High Availability: 99.99% SLA with automatic failover
- Security: VPC isolation, encryption (at rest and in transit), IAM integration
- Automated Backup: Continuous backup with 35-day point-in-time recovery
- Performance: High-speed I/O with SSD storage, read replica support
- Operational Simplification: Fully managed service with automatic patching
Architecture
- Separated Compute and Storage: Independently scalable
- Cluster Configuration: Up to 16 instances (1 primary, 15 read replicas)
- Distributed Storage: 6-way replication with auto-repair
- Multi-AZ Support: High availability and disaster recovery
Supported Instance Types
- t3.medium to r5.24xlarge: Various workload support
- Memory Optimized: r5, r6g families
- General Purpose: t3, m5 families
Advantages and Disadvantages
Advantages
- MongoDB Compatibility: Migrate existing MongoDB applications as-is
- Reduced Operational Burden: Fully managed with no infrastructure management
- High Availability: 99.99% SLA with automatic failover
- Auto Scaling: Independent scaling of storage and compute
- Strong Security: Integration with AWS IAM, VPC, and encryption features
- Automated Backup: Continuous backup and point-in-time recovery
- Performance: High-speed SSD storage with read replicas
- AWS Ecosystem: Tight integration with other AWS services
Disadvantages
- Vendor Lock-in: AWS-specific service, difficult to migrate to other clouds
- Cost: More expensive than self-managed MongoDB
- Feature Limitations: Some MongoDB features (Map-Reduce, etc.) not supported
- Region Limitations: Available in limited regions
- Customization Restrictions: Limited control over infrastructure and detailed settings
- Latency: Slight overhead compared to native MongoDB
Key Links
Code Examples
Installation & Setup
# AWS CLI installation and configuration
aws configure
# Create DocumentDB cluster (AWS CLI)
aws docdb create-db-cluster \
--db-cluster-identifier sample-cluster \
--engine docdb \
--master-username username \
--master-user-password password \
--vpc-security-group-ids sg-12345678 \
--db-subnet-group-name sample-subnet-group
# Create instance
aws docdb create-db-instance \
--db-instance-identifier sample-instance \
--db-instance-class db.t3.medium \
--engine docdb \
--db-cluster-identifier sample-cluster
# Get connection string
aws docdb describe-db-clusters \
--db-cluster-identifier sample-cluster \
--query 'DBClusters[0].Endpoint'
Basic Operations (CRUD)
// Using Node.js MongoDB Driver
const MongoClient = require('mongodb').MongoClient;
// Connection configuration (SSL required)
const client = new MongoClient('mongodb://username:password@sample-cluster.cluster-123456789.us-east-1.docdb.amazonaws.com:27017/?tls=true&replicaSet=rs0&readPreference=secondaryPreferred', {
tlsCAFile: 'rds-ca-2019-root.pem', // Downloaded from AWS
retryWrites: false
});
async function basicOperations() {
try {
await client.connect();
const db = client.db('sampledb');
const collection = db.collection('employees');
// Insert document
const insertResult = await collection.insertOne({
name: 'John Doe',
email: '[email protected]',
department: 'Engineering',
salary: 75000,
joinDate: new Date(),
skills: ['JavaScript', 'Python', 'AWS']
});
console.log('Inserted document:', insertResult.insertedId);
// Insert multiple documents
await collection.insertMany([
{
name: 'Jane Smith',
email: '[email protected]',
department: 'Marketing',
salary: 65000,
skills: ['Marketing', 'Analytics']
},
{
name: 'Bob Johnson',
email: '[email protected]',
department: 'Engineering',
salary: 80000,
skills: ['Java', 'Kubernetes']
}
]);
// Find documents
const employees = await collection.find({
department: 'Engineering'
}).toArray();
console.log('Engineering employees:', employees);
// Update document
await collection.updateOne(
{ email: '[email protected]' },
{
$set: { salary: 82000 },
$push: { skills: 'Docker' }
}
);
// Delete document
await collection.deleteOne({ email: '[email protected]' });
} finally {
await client.close();
}
}
basicOperations().catch(console.error);
Data Modeling
// Complex document structure example
const customerSchema = {
_id: ObjectId(),
customerId: 'CUST-12345',
profile: {
firstName: 'John',
lastName: 'Doe',
email: '[email protected]',
phone: '+1-555-1234-5678',
address: {
zipCode: '10001',
state: 'New York',
city: 'New York',
street: '123 Main St'
}
},
orders: [
{
orderId: 'ORD-001',
date: new Date('2024-01-15'),
items: [
{
productId: 'PROD-A',
name: 'Laptop',
quantity: 1,
price: 1500
}
],
total: 1500,
status: 'shipped'
}
],
preferences: {
language: 'en',
currency: 'USD',
notifications: {
email: true,
sms: false
}
},
metadata: {
createdAt: new Date(),
updatedAt: new Date(),
source: 'web'
}
};
// Index creation examples
async function createIndexes() {
const collection = db.collection('customers');
// Single field index
await collection.createIndex({ 'profile.email': 1 });
// Compound index
await collection.createIndex({
'customerId': 1,
'orders.date': -1
});
// Text index
await collection.createIndex({
'profile.firstName': 'text',
'profile.lastName': 'text'
});
// Partial index
await collection.createIndex(
{ 'orders.status': 1 },
{ partialFilterExpression: { 'orders.status': { $ne: 'cancelled' } } }
);
}
Indexing & Optimization
// Index strategy
async function indexStrategy() {
const collection = db.collection('products');
// Indexes for efficient queries
await collection.createIndex({ category: 1, price: -1 });
await collection.createIndex({ 'tags': 1 });
await collection.createIndex({ 'location.coordinates': '2dsphere' });
// Check index usage
const stats = await collection.aggregate([
{ $indexStats: {} }
]).toArray();
console.log('Index usage stats:', stats);
// Query plan analysis
const explainResult = await collection.find({
category: 'electronics',
price: { $gte: 100 }
}).explain('executionStats');
console.log('Query execution plan:', explainResult);
}
// Aggregation pipeline (Analytics)
async function analyticsQueries() {
const orders = db.collection('orders');
// Sales report
const salesReport = await orders.aggregate([
{
$match: {
orderDate: {
$gte: new Date('2024-01-01'),
$lt: new Date('2024-02-01')
}
}
},
{
$group: {
_id: '$customerId',
totalAmount: { $sum: '$total' },
orderCount: { $sum: 1 },
avgOrderValue: { $avg: '$total' }
}
},
{
$sort: { totalAmount: -1 }
},
{
$limit: 100
}
]).toArray();
return salesReport;
}
Practical Examples
// Production environment configuration
const productionConfig = {
// Connection pool settings
maxPoolSize: 50,
minPoolSize: 5,
connectTimeoutMS: 30000,
socketTimeoutMS: 30000,
// Read settings
readPreference: 'secondaryPreferred',
readConcern: { level: 'majority' },
// Write settings
writeConcern: { w: 'majority', j: true },
// SSL/TLS settings
tls: true,
tlsCAFile: './rds-ca-2019-root.pem',
tlsAllowInvalidHostnames: false,
// Retry settings
retryWrites: false, // Disabled for DocumentDB
retryReads: true
};
// Error handling and retry logic
async function robustOperation(collection, operation) {
const maxRetries = 3;
let retries = 0;
while (retries < maxRetries) {
try {
return await operation(collection);
} catch (error) {
retries++;
if (retries >= maxRetries) {
throw error;
}
// Retry only for temporary errors
if (error.code === 11000 || // Duplicate key error
error.code === 16500 || // Sharding error
error.message.includes('connection')) {
await new Promise(resolve => setTimeout(resolve, 1000 * retries));
continue;
}
throw error; // Don't retry
}
}
}
// CloudWatch metrics monitoring connection health check
async function healthCheck() {
try {
await client.db('admin').command({ ismaster: 1 });
return { status: 'healthy', timestamp: new Date() };
} catch (error) {
return {
status: 'unhealthy',
error: error.message,
timestamp: new Date()
};
}
}
Best Practices
// Security best practices
const securityConfig = {
// Use IAM authentication (recommended)
authMechanism: 'MONGODB-AWS',
authSource: '$external',
// TLS configuration
tls: true,
tlsCAFile: 'rds-ca-2019-root.pem',
tlsCertificateKeyFile: 'client.pem'
};
// Performance optimization
class DocumentDBOptimizer {
constructor(client) {
this.client = client;
}
// Batch processing
async batchInsert(collection, documents, batchSize = 1000) {
const batches = [];
for (let i = 0; i < documents.length; i += batchSize) {
batches.push(documents.slice(i, i + batchSize));
}
const results = [];
for (const batch of batches) {
const result = await collection.insertMany(batch, { ordered: false });
results.push(result);
}
return results;
}
// Efficient pagination
async paginateWithCursor(collection, query, limit = 20, lastId = null) {
const pipeline = [
{ $match: query }
];
if (lastId) {
pipeline[0].$match._id = { $gt: ObjectId(lastId) };
}
pipeline.push(
{ $sort: { _id: 1 } },
{ $limit: limit + 1 }
);
const results = await collection.aggregate(pipeline).toArray();
const hasMore = results.length > limit;
if (hasMore) {
results.pop();
}
return {
data: results,
hasMore,
nextCursor: hasMore ? results[results.length - 1]._id : null
};
}
}
// Monitoring and logging setup
const logging = {
// Connection events
client.on('serverOpening', () => {
console.log('Connected to DocumentDB');
}),
client.on('serverClosed', () => {
console.log('Disconnected from DocumentDB');
}),
// Performance monitoring
client.on('commandStarted', (event) => {
console.log('Command started:', event.commandName);
}),
client.on('commandSucceeded', (event) => {
console.log(`Command ${event.commandName} succeeded in ${event.duration}ms`);
}),
client.on('commandFailed', (event) => {
console.error(`Command ${event.commandName} failed:`, event.failure);
})
};