HashiCorp Nomad
DevOps Tool
HashiCorp Nomad
Overview
HashiCorp Nomad is a simple and flexible workload orchestrator that provides unified scheduling for containers, VMs, and binary applications, offering a lighter and simpler alternative to Kubernetes.
Details
HashiCorp Nomad is a workload orchestration platform developed by HashiCorp. Released in 2015, it was designed as a "simple and flexible workload orchestrator." It can uniformly schedule diverse workloads including containers (Docker, Podman), virtual machines (QEMU), binary applications, and Java applications. Features include simple deployment with a single binary, fewer concepts and simpler configuration than Kubernetes, and tight integration with the HashiCorp stack (Vault, Consul, Terraform). It provides advanced features like high-performance scheduler, multi-region and multi-cloud support, zero-downtime deployment, rolling updates, and canary deployments. It's particularly suitable for edge computing, IoT, small-scale clusters, and mixed workload environments, popular with organizations wanting to avoid Kubernetes complexity. Currently expanding adoption in finance, manufacturing, energy, and other industries among enterprises seeking infrastructure simplification and HashiCorp ecosystem integration.
Advantages and Disadvantages
Advantages
- Simplicity: Easier to understand with fewer concepts than Kubernetes
- Multi-workload: Unified management of containers, VMs, and binaries
- Lightweight: Operates with minimal resources and single binary deployment
- HashiCorp stack integration: Tight integration with Vault, Consul, Terraform
- Edge-ready: Optimal for small-scale and distributed environments
- Operability: Intuitive Web UI and simple configuration
- Flexibility: Easy integration with existing infrastructure
- Multi-region: Unified management across multiple data centers
Disadvantages
- Ecosystem: Not as rich tool ecosystem as Kubernetes
- Feature limitations: Lacks advanced orchestration features
- Learning resources: Less documentation and community than Kubernetes
- Third-party support: Limited support status for Kubernetes-oriented tools
- Networking: Constraints on complex network configurations
- Storage: Limited persistent volume management features
- Enterprise support: Limited enterprise support options
- Talent: More Kubernetes experienced professionals available for hiring
Key Links
- HashiCorp Nomad Official Website
- Nomad Official Documentation
- Nomad GitHub Repository
- Nomad Learn
- HashiCorp
- Nomad Guides
Code Examples
Nomad Cluster Setup
# Download and install Nomad binary
wget https://releases.hashicorp.com/nomad/1.6.0/nomad_1.6.0_linux_amd64.zip
unzip nomad_1.6.0_linux_amd64.zip
sudo mv nomad /usr/local/bin/
# Create configuration file (server mode)
sudo mkdir -p /etc/nomad.d
cat > /etc/nomad.d/nomad.hcl << EOF
datacenter = "dc1"
data_dir = "/opt/nomad/data"
bind_addr = "0.0.0.0"
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = ["10.0.1.10", "10.0.1.11", "10.0.1.12"]
}
}
client {
enabled = true
servers = ["10.0.1.10:4647", "10.0.1.11:4647", "10.0.1.12:4647"]
}
ui_config {
enabled = true
}
consul {
address = "127.0.0.1:8500"
}
vault {
enabled = true
address = "http://vault.example.com:8200"
}
EOF
# systemd service configuration
cat > /etc/systemd/system/nomad.service << EOF
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target
[Service]
Type=notify
ExecStart=/usr/local/bin/nomad agent -config=/etc/nomad.d/
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl enable nomad
sudo systemctl start nomad
Job Definition (Docker Container)
# web-app.nomad
job "web-app" {
datacenters = ["dc1"]
type = "service"
group "web" {
count = 3
network {
port "http" {
static = 8080
to = 80
}
}
service {
name = "web-app"
port = "http"
tags = [
"web",
"nginx",
"frontend"
]
check {
type = "http"
path = "/health"
interval = "30s"
timeout = "2s"
}
}
task "nginx" {
driver = "docker"
config {
image = "nginx:alpine"
ports = ["http"]
mount {
type = "bind"
source = "local/nginx.conf"
target = "/etc/nginx/nginx.conf"
}
}
template {
data = <<EOF
events {
worker_connections 1024;
}
http {
upstream app {
server {{ range service "app-backend" }}{{ .Address }}:{{ .Port }};{{ end }}
}
server {
listen 80;
location / {
proxy_pass http://app;
}
location /health {
return 200 "OK";
}
}
}
EOF
destination = "local/nginx.conf"
}
resources {
cpu = 100
memory = 128
}
env {
ENV = "production"
}
}
}
update {
max_parallel = 1
min_healthy_time = "10s"
healthy_deadline = "3m"
progress_deadline = "10m"
auto_revert = true
canary = 1
}
}
Job Definition (Binary Application)
# go-app.nomad
job "go-app" {
datacenters = ["dc1"]
type = "service"
group "app" {
count = 2
network {
port "api" {
to = 8080
}
}
service {
name = "go-api"
port = "api"
check {
type = "tcp"
interval = "10s"
timeout = "2s"
}
}
task "api-server" {
driver = "exec"
artifact {
source = "https://releases.example.com/myapp/v1.2.3/myapp-linux-amd64"
destination = "local/"
mode = "file"
}
config {
command = "local/myapp-linux-amd64"
args = ["--port", "${NOMAD_PORT_api}"]
}
env {
DATABASE_URL = "postgresql://user:${vault_secret}@db.example.com:5432/myapp"
}
vault {
policies = ["database-read"]
}
resources {
cpu = 200
memory = 256
}
logs {
max_files = 3
max_file_size = 10
}
}
}
}
Batch Job (Data Processing)
# batch-job.nomad
job "data-processing" {
datacenters = ["dc1"]
type = "batch"
periodic {
cron = "0 2 * * *" # Daily at 2 AM
prohibit_overlap = true
}
group "process" {
count = 1
task "etl" {
driver = "docker"
config {
image = "python:3.9"
command = "python"
args = ["process_data.py"]
mount {
type = "bind"
source = "local/script"
target = "/app"
}
}
artifact {
source = "git::https://github.com/company/data-scripts.git"
destination = "local/script"
}
env {
AWS_ACCESS_KEY_ID = "${aws_access_key}"
AWS_SECRET_ACCESS_KEY = "${aws_secret_key}"
DATA_BUCKET = "my-data-bucket"
}
vault {
policies = ["aws-s3-access"]
}
resources {
cpu = 500
memory = 1024
}
}
}
}
System Job (Log Collection)
# log-collector.nomad
job "log-collector" {
datacenters = ["dc1"]
type = "system"
group "collector" {
task "fluentd" {
driver = "docker"
config {
image = "fluentd:latest"
mount {
type = "bind"
source = "/var/log"
target = "/var/log"
readonly = true
}
mount {
type = "bind"
source = "local/fluent.conf"
target = "/fluentd/etc/fluent.conf"
}
}
template {
data = <<EOF
<source>
@type tail
path /var/log/nomad/nomad.log
pos_file /var/log/fluentd/nomad.log.pos
tag nomad.log
format json
</source>
<match nomad.**>
@type elasticsearch
host {{ range service "elasticsearch" }}{{ .Address }}{{ end }}
port {{ range service "elasticsearch" }}{{ .Port }}{{ end }}
index_name nomad-logs
</match>
EOF
destination = "local/fluent.conf"
}
resources {
cpu = 100
memory = 128
}
}
}
}
CSI Storage Plugin
# csi-plugin.nomad
job "csi-plugin" {
datacenters = ["dc1"]
type = "system"
group "csi-node" {
task "csi-plugin" {
driver = "docker"
config {
image = "my-registry/csi-driver:latest"
args = [
"--endpoint=${CSI_ENDPOINT}",
"--nodeid=${node.unique.id}"
]
privileged = true
}
csi_plugin {
id = "ebs"
type = "node"
mount_dir = "/csi"
}
resources {
cpu = 100
memory = 128
}
}
}
}
Nomad CLI Operations
# Job management
nomad job run web-app.nomad
nomad job status web-app
nomad job stop web-app
nomad job restart web-app
# Scaling
nomad job scale web-app 5
# Deployment verification
nomad deployment list
nomad deployment status <deployment-id>
nomad deployment promote <deployment-id>
# Allocation management
nomad alloc status <alloc-id>
nomad alloc logs <alloc-id>
nomad alloc exec <alloc-id> /bin/sh
# Node management
nomad node status
nomad node drain <node-id>
nomad node eligibility <node-id> ineligible
# System verification
nomad server members
nomad operator autopilot get-config
nomad system gc
# Metrics and monitoring
nomad metrics
nomad monitor -log-level DEBUG
High Availability Configuration
# nomad.hcl (HA configuration)
server {
enabled = true
bootstrap_expect = 3
server_join {
retry_join = ["provider=aws tag_key=Environment tag_value=production"]
}
encrypt = "base64-encrypted-gossip-key"
autopilot {
cleanup_dead_servers = true
last_contact_threshold = "200ms"
max_trailing_logs = 250
server_stabilization_time = "10s"
}
}
acl {
enabled = true
}
tls {
http = true
rpc = true
ca_file = "/etc/nomad.d/ca.pem"
cert_file = "/etc/nomad.d/server.pem"
key_file = "/etc/nomad.d/server-key.pem"
}