#!/bin/bash # vault-bootstrap.sh — Vault HA cluster bootstrap (Shamir seal, Docker secret) # Node-agnostic: uses docker exec for local replicas, falls back to the overlay # network via docker run when no local replica is found on this Swarm manager. set -euo pipefail # ─── Configuration ─────────────────────────────────────────────────── STACK_NAME="iklimco" STACK_FILE="$(cd "$(dirname "$0")" && pwd)/docker-stack-vault.yml" OUT_DIR="/tmp/vault-bootstrap" SKIP_DEPLOY="${SKIP_DEPLOY:-false}" # ───────────────────────────────────────────────────────────────────── mkdir -p "$OUT_DIR" MAIN_INIT_FILE="$OUT_DIR/main-vault-init.txt" # ─── Logging ───────────────────────────────────────────────────────── step() { echo; echo "════════════════════════════════════════════════"; echo " [$(date '+%H:%M:%S')] $*"; echo "════════════════════════════════════════════════"; } ok() { echo " [OK] $*"; } info() { echo " --> $*"; } fail() { echo; echo " [ERROR] $*" >&2; exit 1; } trap 'echo; echo " [ERROR] Script terminated unexpectedly at line $LINENO" >&2' ERR # ───────────────────────────────────────────────────────────────────── # ─── Helpers ───────────────────────────────────────────────────────── wait_service_running() { local svc="$1" expected="$2" timeout="${3:-180}" elapsed=0 info "Waiting for: $svc ($expected running task)..." while [ "$elapsed" -lt "$timeout" ]; do running=$(docker service ps "$svc" \ --filter "desired-state=running" \ --format '{{.CurrentState}}' 2>/dev/null \ | grep -c "^Running" || true) if [ "$running" -ge "$expected" ]; then ok "$svc ready: $running/$expected" return 0 fi sleep 5; elapsed=$((elapsed+5)) echo " ${elapsed}s/${timeout}s — running: $running/$expected" done fail "$svc did not become ready within $timeout seconds" } # Run a vault CLI command — uses docker exec if a vault replica is on this node, # otherwise falls back to the overlay network via docker run. VAULT_TOKEN="" run_vault() { local cmd="$*" [ -n "$VAULT_TOKEN" ] && cmd="VAULT_TOKEN=$VAULT_TOKEN $cmd" local cid cid=$(docker ps -q -f "name=${STACK_NAME}_vault\." | head -1 || true) if [ -n "$cid" ]; then docker exec -i "$cid" sh -c "VAULT_ADDR=https://127.0.0.1:8200 VAULT_SKIP_VERIFY=true $cmd" else docker run --rm -i --network iklimco-net hashicorp/vault:2.0.1 \ sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true $cmd" fi } # Run a vault CLI command targeting a specific node by its node_id (= STABLE_ID = # the api_addr hostname set inside the container). Used for direct per-peer unseal. run_vault_on() { local node_id="$1"; shift local cmd="$*" [ -n "$VAULT_TOKEN" ] && cmd="VAULT_TOKEN=$VAULT_TOKEN $cmd" docker run --rm -i --network iklimco-net hashicorp/vault:2.0.1 \ sh -c "VAULT_ADDR=https://${node_id}:8200 VAULT_SKIP_VERIFY=true $cmd" } # Send N requests to the shared alias; returns 0 only when ALL return Sealed: false. # Runs everything inside a single docker container to avoid 9 separate startups. check_cluster_unsealed() { local n="${1:-9}" docker run --rm --network iklimco-net hashicorp/vault:2.0.1 sh -c " sealed=0; i=0 while [ \$i -lt $n ]; do s=\$(VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true vault status 2>/dev/null | awk '/^Sealed/{print \$2}' || echo 'true') [ \"\$s\" = 'true' ] && sealed=\$((sealed+1)) i=\$((i+1)); [ \$i -lt $n ] && sleep 1 done exit \$sealed " } # ───────────────────────────────────────────────────────────────────── # ━━━ STEP 0 — Prerequisites ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ step "STEP 0 — Checking prerequisites" docker node ls &>/dev/null || fail "Swarm manager node is required" [ -f "$STACK_FILE" ] || fail "Stack file not found: $STACK_FILE" ok "Prerequisites completed" # ━━━ NEW: Cluster Health Check ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # Check if the cluster is already healthy to avoid unnecessary restarts if bash "$(dirname "$0")/vault-check-health.sh"; then echo echo "════════════════════════════════════════════════" echo " CLUSTER ALREADY HEALTHY — Skipping Bootstrap" echo "════════════════════════════════════════════════" exit 0 fi # ━━━ STEP 1 — Placeholder secret ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ step "STEP 1 — Checking vault_unseal_key" if docker secret ls --format '{{.Name}}' | grep -q '^vault_unseal_key'; then info "vault_unseal_key exists, skipping" else echo "bootstrap" | docker secret create vault_unseal_key - >/dev/null ok "vault_unseal_key (placeholder) created" fi # ━━━ STEP 2 — Stack deploy ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ step "STEP 2 — Stack deploy" if [ "$SKIP_DEPLOY" = "true" ]; then info "SKIP_DEPLOY=true — skipping" else docker stack deploy --with-registry-auth -c "$STACK_FILE" "$STACK_NAME" ok "Stack deployed" fi # ━━━ STEP 3 — Waiting for Vault cluster ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ step "STEP 3 — Waiting for Vault cluster" wait_service_running "${STACK_NAME}_vault" 3 300 sleep 10 # ━━━ STEP 4 — Vault status check (early exit) ━━━━━━━━━━━━━━━━━━━ # Early-exit requires the ENTIRE cluster to be unsealed. We fire N requests to # the shared alias (load-balanced) and all must return Sealed: false. A single # healthy node is not sufficient evidence that all 3 nodes are unsealed. step "STEP 4 — Vault status check" VAULT_STATUS_OUT=$(run_vault "vault status 2>/dev/null" || true) VAULT_INITIALIZED=$(echo "$VAULT_STATUS_OUT" | awk '/^Initialized/{print $2}') VAULT_SEALED=$(echo "$VAULT_STATUS_OUT" | awk '/^Sealed/{print $2}') info "Initialized: ${VAULT_INITIALIZED:-unknown}, Sealed: ${VAULT_SEALED:-unknown}" if [ "$VAULT_INITIALIZED" = "true" ] && [ "$VAULT_SEALED" = "false" ]; then info "At least 1 node healthy — checking cluster-wide (9 requests)..." if check_cluster_unsealed 9; then ok "Vault cluster fully unsealed and healthy" echo echo "════════════════════════════════════════════════" echo " BOOTSTRAP COMPLETED (Vault healthy)" echo "════════════════════════════════════════════════" exit 0 else info "Some nodes are still sealed — bootstrap continuing..." fi fi # ━━━ STEP 5 — Vault initialize (if needed) ━━━━━━━━━━━━━━━━━━━━━━━━━ step "STEP 5 — Initializing Vault / preparing unseal key" if [ "$VAULT_INITIALIZED" = "true" ]; then # Vault is sealed but initialized. This happens when the vault_unseal_key Docker secret # contains the wrong value (e.g., placeholder was never replaced). Provide the init file # so the real key can be extracted and pushed to the secret. info "Vault is sealed but initialized — using existing init file" [ -f "$MAIN_INIT_FILE" ] && grep -q "Unseal Key 1" "$MAIN_INIT_FILE" \ || fail "Init file missing: $MAIN_INIT_FILE\nManually add the Unseal Key to the file in this format:\n Unseal Key 1: " ok "Init file exists" else info "Initializing Vault..." run_vault "vault operator init -key-shares=1 -key-threshold=1" | tee "$MAIN_INIT_FILE" ok "Vault init completed: $MAIN_INIT_FILE" fi # ━━━ STEP 6 — Update vault_unseal_key Docker secret ━━━━━━━━━━━━━━ # Two-step update (delete + recreate with the same name) keeps the secret name # consistent with the stack file so future 'docker stack deploy' runs do not # trigger a service restart or revert to the placeholder. step "STEP 6 — Updating vault_unseal_key Docker secret" UNSEAL_KEY=$(awk '/Unseal Key 1:/{print $NF}' "$MAIN_INIT_FILE") [ -n "$UNSEAL_KEY" ] || fail "Unseal key not found in '$MAIN_INIT_FILE' file" info "Removing old secret from service (rolling restart 1/2)..." docker service update --secret-rm vault_unseal_key "${STACK_NAME}_vault" >/dev/null sleep 5 docker secret rm vault_unseal_key || true info "Recreating secret with real unseal key (rolling restart 2/2)..." echo "$UNSEAL_KEY" | docker secret create vault_unseal_key - >/dev/null docker service update --secret-add vault_unseal_key "${STACK_NAME}_vault" >/dev/null ok "vault_unseal_key updated with real value" # ━━━ STEP 6b — Leader unseal and peer nodes ━━━━━━━━━━━━━━━━━━━━━━ # After rolling restart: # - The node that ran 'vault operator init' has Raft data; its entrypoint retry # loop will unseal it and it becomes the Raft leader. # - Peer nodes start with EMPTY Raft storage. They cannot unseal until they join # the Raft cluster (chicken-and-egg). The entrypoint retry loop keeps trying # every 2s; once they join Raft they become Initialized=true and the next # unseal attempt succeeds. # - We also try to unseal peers explicitly by node_id (= STABLE_ID = api_addr # hostname). This requires the node_id to be resolvable on the overlay network. # If it is not, the explicit attempt is silently skipped and the entrypoint # retry loop handles it instead (worst case: ~60s extra wait). step "STEP 6b — Waiting for Raft leader and unsealing peer nodes" info "Waiting for Raft leader unseal after rolling restart (max 3 minutes)..." LEADER_UP=0 for i in $(seq 1 36); do STATUS=$(run_vault "vault status 2>/dev/null" | awk '/^Sealed/{print $2}' || echo "true") if [ "$STATUS" = "false" ]; then ok "Raft leader unsealed" LEADER_UP=1 break fi echo " ${i}/36 — Sealed: ${STATUS}, waiting 5s..." sleep 5 done [ "$LEADER_UP" -eq 1 ] || fail "Raft leader did not unseal within 3 minutes" ROOT_TOKEN=$(awk '/^Initial Root Token:/{print $NF}' "$MAIN_INIT_FILE") [ -n "$ROOT_TOKEN" ] || fail "Root token not found in '$MAIN_INIT_FILE' file" VAULT_TOKEN="$ROOT_TOKEN" # Wait for all peers to join the Raft cluster (retry_join retries every ~30s). info "Waiting for Raft cluster formation (3 peers, max 3 minutes)..." ALL_JOINED=0 for i in $(seq 1 36); do PEER_COUNT=$(run_vault "vault operator raft list-peers 2>/dev/null" \ | awk 'NR>2 && /[a-zA-Z0-9]/{c++} END{print c+0}' || true) if [ "${PEER_COUNT:-0}" -ge 3 ]; then ok "Raft cluster complete: ${PEER_COUNT}/3 peers" ALL_JOINED=1 break fi echo " ${i}/36 — Raft peers: ${PEER_COUNT:-0}/3, waiting 5s..." sleep 5 done [ "$ALL_JOINED" -eq 1 ] || fail "Raft cluster did not form within 3 minutes" # Explicitly unseal each non-leader peer via its node_id on the overlay network. # node_id equals STABLE_ID (the api_addr hostname configured in vault-template-v2.json). # Best-effort: if the hostname is not resolvable, the entrypoint retry loop handles it. info "Unsealing peer nodes individually (best-effort)..." PEER_HOSTS=$(run_vault "vault operator raft list-peers 2>/dev/null" \ | awk 'NR>2 && /[a-zA-Z0-9]/ && !/leader/{print $1}' || true) for peer_host in $PEER_HOSTS; do info " Unsealing peer: $peer_host" if run_vault_on "$peer_host" "vault operator unseal $UNSEAL_KEY" > /dev/null 2>&1; then ok " $peer_host: unseal command sent" else info " $peer_host: direct unseal failed (overlay DNS could not be resolved — entrypoint loop continuing)" fi done # ━━━ STEP 7 — Are all nodes unsealed? ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # Fire 9 requests to the shared alias with 1s sleep between each. With 3 nodes # and any reasonable load-balancing the probability of hitting all 3 is very high. # All 9 must return Sealed: false. We retry for up to 4 minutes to give the # entrypoint retry loop time to finish for nodes that joined Raft late. step "STEP 7 — Verifying full Vault cluster unseal" info "Waiting for entrypoint retry loop completion (max 4 minutes)..." UNSEALED=0 for i in $(seq 1 24); do if check_cluster_unsealed 9; then ok "Vault cluster fully unsealed (9/9 checks successful)" UNSEALED=1 break fi echo " ${i}/24 — Cluster not fully healthy yet, waiting 10s..." # Re-attempt explicit peer unseal on every iteration in case hostname became # resolvable after Raft catch-up (containers may still be starting up). PEER_HOSTS=$(run_vault "vault operator raft list-peers 2>/dev/null" \ | awk 'NR>2 && /[a-zA-Z0-9]/ && !/leader/{print $1}' || true) for peer_host in $PEER_HOSTS; do run_vault_on "$peer_host" "vault operator unseal $UNSEAL_KEY" > /dev/null 2>&1 || true done sleep 10 done [ "$UNSEALED" -eq 1 ] || fail "Vault cluster did not unseal — check logs with 'docker service logs ${STACK_NAME}_vault'" echo echo "════════════════════════════════════════════════" echo " BOOTSTRAP COMPLETED" echo " Init output: $MAIN_INIT_FILE" echo " IMPORTANT: Back up this file to a safe place and" echo " delete it from the production environment!" echo "════════════════════════════════════════════════"