HashiCorp Nomad

DevOpscontainersNomadHashiCorpworkload orchestrationscheduleredge computing

DevOps Tool

HashiCorp Nomad

Overview

HashiCorp Nomad is a simple and flexible workload orchestrator that provides unified scheduling for containers, VMs, and binary applications, offering a lighter and simpler alternative to Kubernetes.

Details

HashiCorp Nomad is a workload orchestration platform developed by HashiCorp. Released in 2015, it was designed as a "simple and flexible workload orchestrator." It can uniformly schedule diverse workloads including containers (Docker, Podman), virtual machines (QEMU), binary applications, and Java applications. Features include simple deployment with a single binary, fewer concepts and simpler configuration than Kubernetes, and tight integration with the HashiCorp stack (Vault, Consul, Terraform). It provides advanced features like high-performance scheduler, multi-region and multi-cloud support, zero-downtime deployment, rolling updates, and canary deployments. It's particularly suitable for edge computing, IoT, small-scale clusters, and mixed workload environments, popular with organizations wanting to avoid Kubernetes complexity. Currently expanding adoption in finance, manufacturing, energy, and other industries among enterprises seeking infrastructure simplification and HashiCorp ecosystem integration.

Advantages and Disadvantages

Advantages

  • Simplicity: Easier to understand with fewer concepts than Kubernetes
  • Multi-workload: Unified management of containers, VMs, and binaries
  • Lightweight: Operates with minimal resources and single binary deployment
  • HashiCorp stack integration: Tight integration with Vault, Consul, Terraform
  • Edge-ready: Optimal for small-scale and distributed environments
  • Operability: Intuitive Web UI and simple configuration
  • Flexibility: Easy integration with existing infrastructure
  • Multi-region: Unified management across multiple data centers

Disadvantages

  • Ecosystem: Not as rich tool ecosystem as Kubernetes
  • Feature limitations: Lacks advanced orchestration features
  • Learning resources: Less documentation and community than Kubernetes
  • Third-party support: Limited support status for Kubernetes-oriented tools
  • Networking: Constraints on complex network configurations
  • Storage: Limited persistent volume management features
  • Enterprise support: Limited enterprise support options
  • Talent: More Kubernetes experienced professionals available for hiring

Key Links

Code Examples

Nomad Cluster Setup

# Download and install Nomad binary
wget https://releases.hashicorp.com/nomad/1.6.0/nomad_1.6.0_linux_amd64.zip
unzip nomad_1.6.0_linux_amd64.zip
sudo mv nomad /usr/local/bin/

# Create configuration file (server mode)
sudo mkdir -p /etc/nomad.d
cat > /etc/nomad.d/nomad.hcl << EOF
datacenter = "dc1"
data_dir   = "/opt/nomad/data"
bind_addr  = "0.0.0.0"

server {
  enabled          = true
  bootstrap_expect = 3
  server_join {
    retry_join = ["10.0.1.10", "10.0.1.11", "10.0.1.12"]
  }
}

client {
  enabled = true
  servers = ["10.0.1.10:4647", "10.0.1.11:4647", "10.0.1.12:4647"]
}

ui_config {
  enabled = true
}

consul {
  address = "127.0.0.1:8500"
}

vault {
  enabled = true
  address = "http://vault.example.com:8200"
}
EOF

# systemd service configuration
cat > /etc/systemd/system/nomad.service << EOF
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/
Requires=network-online.target
After=network-online.target

[Service]
Type=notify
ExecStart=/usr/local/bin/nomad agent -config=/etc/nomad.d/
ExecReload=/bin/kill -HUP $MAINPID
KillMode=process
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF

sudo systemctl enable nomad
sudo systemctl start nomad

Job Definition (Docker Container)

# web-app.nomad
job "web-app" {
  datacenters = ["dc1"]
  type        = "service"

  group "web" {
    count = 3

    network {
      port "http" {
        static = 8080
        to     = 80
      }
    }

    service {
      name = "web-app"
      port = "http"
      
      tags = [
        "web",
        "nginx",
        "frontend"
      ]

      check {
        type     = "http"
        path     = "/health"
        interval = "30s"
        timeout  = "2s"
      }
    }

    task "nginx" {
      driver = "docker"

      config {
        image = "nginx:alpine"
        ports = ["http"]
        
        mount {
          type   = "bind"
          source = "local/nginx.conf"
          target = "/etc/nginx/nginx.conf"
        }
      }

      template {
        data = <<EOF
events {
    worker_connections 1024;
}
http {
    upstream app {
        server {{ range service "app-backend" }}{{ .Address }}:{{ .Port }};{{ end }}
    }
    server {
        listen 80;
        location / {
            proxy_pass http://app;
        }
        location /health {
            return 200 "OK";
        }
    }
}
EOF
        destination = "local/nginx.conf"
      }

      resources {
        cpu    = 100
        memory = 128
      }

      env {
        ENV = "production"
      }
    }
  }

  update {
    max_parallel      = 1
    min_healthy_time  = "10s"
    healthy_deadline  = "3m"
    progress_deadline = "10m"
    auto_revert       = true
    canary            = 1
  }
}

Job Definition (Binary Application)

# go-app.nomad
job "go-app" {
  datacenters = ["dc1"]
  type        = "service"

  group "app" {
    count = 2

    network {
      port "api" {
        to = 8080
      }
    }

    service {
      name = "go-api"
      port = "api"
      
      check {
        type     = "tcp"
        interval = "10s"
        timeout  = "2s"
      }
    }

    task "api-server" {
      driver = "exec"

      artifact {
        source      = "https://releases.example.com/myapp/v1.2.3/myapp-linux-amd64"
        destination = "local/"
        mode        = "file"
      }

      config {
        command = "local/myapp-linux-amd64"
        args    = ["--port", "${NOMAD_PORT_api}"]
      }

      env {
        DATABASE_URL = "postgresql://user:${vault_secret}@db.example.com:5432/myapp"
      }

      vault {
        policies = ["database-read"]
      }

      resources {
        cpu    = 200
        memory = 256
      }

      logs {
        max_files     = 3
        max_file_size = 10
      }
    }
  }
}

Batch Job (Data Processing)

# batch-job.nomad
job "data-processing" {
  datacenters = ["dc1"]
  type        = "batch"

  periodic {
    cron             = "0 2 * * *"  # Daily at 2 AM
    prohibit_overlap = true
  }

  group "process" {
    count = 1

    task "etl" {
      driver = "docker"

      config {
        image = "python:3.9"
        command = "python"
        args = ["process_data.py"]
        
        mount {
          type   = "bind"
          source = "local/script"
          target = "/app"
        }
      }

      artifact {
        source      = "git::https://github.com/company/data-scripts.git"
        destination = "local/script"
      }

      env {
        AWS_ACCESS_KEY_ID     = "${aws_access_key}"
        AWS_SECRET_ACCESS_KEY = "${aws_secret_key}"
        DATA_BUCKET          = "my-data-bucket"
      }

      vault {
        policies = ["aws-s3-access"]
      }

      resources {
        cpu    = 500
        memory = 1024
      }
    }
  }
}

System Job (Log Collection)

# log-collector.nomad
job "log-collector" {
  datacenters = ["dc1"]
  type        = "system"

  group "collector" {
    task "fluentd" {
      driver = "docker"

      config {
        image = "fluentd:latest"
        
        mount {
          type   = "bind"
          source = "/var/log"
          target = "/var/log"
          readonly = true
        }
        
        mount {
          type   = "bind"
          source = "local/fluent.conf"
          target = "/fluentd/etc/fluent.conf"
        }
      }

      template {
        data = <<EOF
<source>
  @type tail
  path /var/log/nomad/nomad.log
  pos_file /var/log/fluentd/nomad.log.pos
  tag nomad.log
  format json
</source>

<match nomad.**>
  @type elasticsearch
  host {{ range service "elasticsearch" }}{{ .Address }}{{ end }}
  port {{ range service "elasticsearch" }}{{ .Port }}{{ end }}
  index_name nomad-logs
</match>
EOF
        destination = "local/fluent.conf"
      }

      resources {
        cpu    = 100
        memory = 128
      }
    }
  }
}

CSI Storage Plugin

# csi-plugin.nomad
job "csi-plugin" {
  datacenters = ["dc1"]
  type        = "system"

  group "csi-node" {
    task "csi-plugin" {
      driver = "docker"

      config {
        image = "my-registry/csi-driver:latest"
        
        args = [
          "--endpoint=${CSI_ENDPOINT}",
          "--nodeid=${node.unique.id}"
        ]

        privileged = true
      }

      csi_plugin {
        id        = "ebs"
        type      = "node"
        mount_dir = "/csi"
      }

      resources {
        cpu    = 100
        memory = 128
      }
    }
  }
}

Nomad CLI Operations

# Job management
nomad job run web-app.nomad
nomad job status web-app
nomad job stop web-app
nomad job restart web-app

# Scaling
nomad job scale web-app 5

# Deployment verification
nomad deployment list
nomad deployment status <deployment-id>
nomad deployment promote <deployment-id>

# Allocation management
nomad alloc status <alloc-id>
nomad alloc logs <alloc-id>
nomad alloc exec <alloc-id> /bin/sh

# Node management
nomad node status
nomad node drain <node-id>
nomad node eligibility <node-id> ineligible

# System verification
nomad server members
nomad operator autopilot get-config
nomad system gc

# Metrics and monitoring
nomad metrics
nomad monitor -log-level DEBUG

High Availability Configuration

# nomad.hcl (HA configuration)
server {
  enabled          = true
  bootstrap_expect = 3
  
  server_join {
    retry_join = ["provider=aws tag_key=Environment tag_value=production"]
  }
  
  encrypt = "base64-encrypted-gossip-key"
  
  autopilot {
    cleanup_dead_servers      = true
    last_contact_threshold    = "200ms"
    max_trailing_logs         = 250
    server_stabilization_time = "10s"
  }
}

acl {
  enabled = true
}

tls {
  http = true
  rpc  = true
  
  ca_file   = "/etc/nomad.d/ca.pem"
  cert_file = "/etc/nomad.d/server.pem"
  key_file  = "/etc/nomad.d/server-key.pem"
}