fix(vault): Stable Raft cluster formation and reliable multi-node unseal on Docker Swarm

Root cause: Docker Swarm assigns a new random container ID as $HOSTNAME on every
task restart, making node_id, api_addr, and cluster_addr change with each restart.
Vault could not recognize its own Raft data → cluster never reformed after restart.

Fixes:
- docker-stack-vault.yml: add hostname: "vault-{{.Task.Slot}}.iklim.co" so each
  replica gets a stable, slot-based hostname covered by the *.iklim.co wildcard cert.
  Replace STABLE_ID/NODE_ID_PLACEHOLDER logic with a single HOSTNAME_PLACEHOLDER sed.
  Replace single unseal attempt with a retry loop (90×2s) so peer nodes unseal as
  soon as they join Raft, without needing external intervention.
- vault-bootstrap.sh: add ADIM 6b — after rolling restart, wait for Raft leader to
  unseal, wait for all peers to join Raft (vault operator raft list-peers), then
  attempt explicit per-peer unseal via overlay network (best-effort).
  ADIM 4 early-exit now fires N requests to the shared alias; all must return
  Sealed: false before declaring the cluster healthy.
  ADIM 7 polls up to 4 minutes via check_cluster_unsealed (9 shared-alias requests)
  and retries peer unseal on each iteration.
- deploy-prod.yml: health check now fires 9 requests to the shared alias; all must
  return Sealed: false (single-node check was masking partially-sealed clusters).
This commit is contained in:
Murat ÖZDEMİR 2026-06-10 18:17:59 +03:00
parent 2ec208b7a2
commit 392a015b8d
3 changed files with 143 additions and 27 deletions

View File

@ -38,12 +38,19 @@ jobs:
- name: Verify Vault Cluster Health - name: Verify Vault Cluster Health
run: | run: |
SEALED=$(docker run --rm --network iklimco-net hashicorp/vault:2.0.1 \ # Fire 9 requests to the shared alias (load-balanced across all 3 nodes).
sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true vault status 2>/dev/null" \ # Every request must return Sealed: false — one healthy node is not enough.
| awk '/^Sealed/{print $2}' || echo "true") SEALED_COUNT=0
if [ "$SEALED" = "false" ]; then for i in $(seq 1 9); do
echo "Vault cluster is unsealed and healthy" SEALED=$(docker run --rm --network iklimco-net hashicorp/vault:2.0.1 \
sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true vault status 2>/dev/null" \
| awk '/^Sealed/{print $2}' || echo "true")
[ "$SEALED" = "true" ] && SEALED_COUNT=$((SEALED_COUNT+1))
sleep 1
done
if [ "$SEALED_COUNT" -eq 0 ]; then
echo "Vault cluster is fully unsealed and healthy (9/9 checks passed)"
else else
echo "ERROR: Vault cluster is sealed or unreachable" echo "ERROR: $SEALED_COUNT/9 checks returned sealed or unreachable"
exit 1 exit 1
fi fi

View File

@ -11,13 +11,18 @@ services:
image: hashicorp/vault:2.0.1 image: hashicorp/vault:2.0.1
cap_add: cap_add:
- IPC_LOCK - IPC_LOCK
# Overriding the default entrypoint to manipulate configuration strictly in RAM # hostname uses the service slot number (stable across restarts) so that node_id,
# api_addr, and cluster_addr remain consistent after every container restart.
# vault-N.iklim.co is covered by the *.iklim.co wildcard cert (TLS works).
hostname: "vault-{{.Task.Slot}}.iklim.co"
entrypoint: ["sh", "-c"] entrypoint: ["sh", "-c"]
# 1. Resolves HOSTNAME_PLACEHOLDER via sed entirely in RAM (/dev/shm) — no secret touches disk # 1. Substitutes HOSTNAME_PLACEHOLDER with $HOSTNAME (vault-N.iklim.co) in RAM (/dev/shm)
# 2. Starts vault server in background # 2. Starts vault server in background
# 3. Registers SIGTERM/SIGINT trap for graceful shutdown # 3. Registers SIGTERM/SIGINT trap for graceful shutdown
# 4. Polls vault status; exit code 1 = not yet ready, 0 or 2 = vault is responding # 4. Polls vault status; exit code 1 = not yet ready, 0 or 2 = vault is responding
# 5. Auto-unseals using vault_unseal_key Docker secret (no-op if key is wrong or file missing) # 5. Retry-unseal loop: attempts unseal every 2s for up to 3 min.
# On initial bootstrap peers have empty Raft storage and cannot unseal until they
# join the cluster; the loop keeps retrying so they unseal as soon as Raft join succeeds.
# 6. Waits for vault to exit and propagates exit code to Docker # 6. Waits for vault to exit and propagates exit code to Docker
command: > command: >
"cat /vault/config/vault.json | sed \"s/HOSTNAME_PLACEHOLDER/$$HOSTNAME/g\" > /dev/shm/vault.json; "cat /vault/config/vault.json | sed \"s/HOSTNAME_PLACEHOLDER/$$HOSTNAME/g\" > /dev/shm/vault.json;
@ -26,7 +31,7 @@ services:
trap 'kill -TERM $$VAULT_PID; wait $$VAULT_PID' TERM INT; trap 'kill -TERM $$VAULT_PID; wait $$VAULT_PID' TERM INT;
export VAULT_ADDR='https://127.0.0.1:8200' VAULT_SKIP_VERIFY='true'; export VAULT_ADDR='https://127.0.0.1:8200' VAULT_SKIP_VERIFY='true';
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do vault status > /dev/null 2>&1; [ $$? -ne 1 ] && break; sleep 2; done; for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do vault status > /dev/null 2>&1; [ $$? -ne 1 ] && break; sleep 2; done;
vault operator unseal $$(cat /run/secrets/vault_unseal_key 2>/dev/null) > /dev/null 2>&1 || true; i=0; while [ $$i -lt 90 ]; do vault status > /dev/null 2>&1 && break; vault operator unseal $$(cat /run/secrets/vault_unseal_key 2>/dev/null) > /dev/null 2>&1 || true; sleep 2; i=$$(($$i+1)); done;
wait $$VAULT_PID" wait $$VAULT_PID"
networks: networks:
iklimco-net: iklimco-net:

View File

@ -56,6 +56,31 @@ run_vault() {
sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true $cmd" sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true $cmd"
fi fi
} }
# Run a vault CLI command targeting a specific node by its node_id (= STABLE_ID =
# the api_addr hostname set inside the container). Used for direct per-peer unseal.
run_vault_on() {
local node_id="$1"; shift
local cmd="$*"
[ -n "$VAULT_TOKEN" ] && cmd="VAULT_TOKEN=$VAULT_TOKEN $cmd"
docker run --rm -i --network iklimco-net hashicorp/vault:2.0.1 \
sh -c "VAULT_ADDR=https://${node_id}:8200 VAULT_SKIP_VERIFY=true $cmd"
}
# Send N requests to the shared alias; returns 0 only when ALL return Sealed: false.
# Runs everything inside a single docker container to avoid 9 separate startups.
check_cluster_unsealed() {
local n="${1:-9}"
docker run --rm --network iklimco-net hashicorp/vault:2.0.1 sh -c "
sealed=0; i=0
while [ \$i -lt $n ]; do
s=\$(VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true vault status 2>/dev/null | awk '/^Sealed/{print \$2}' || echo 'true')
[ \"\$s\" = 'true' ] && sealed=\$((sealed+1))
i=\$((i+1)); [ \$i -lt $n ] && sleep 1
done
exit \$sealed
"
}
# ───────────────────────────────────────────────────────────────────── # ─────────────────────────────────────────────────────────────────────
# ━━━ ADIM 0 — On kosullar ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ━━━ ADIM 0 — On kosullar ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@ -87,7 +112,10 @@ step "ADIM 3 — Vault cluster bekleniyor"
wait_service_running "${STACK_NAME}_vault" 3 300 wait_service_running "${STACK_NAME}_vault" 3 300
sleep 10 sleep 10
# ━━━ ADIM 4 — Vault durum kontrolu ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ━━━ ADIM 4 — Vault durum kontrolu (erken cikis) ━━━━━━━━━━━━━━━━━━━
# Early-exit requires the ENTIRE cluster to be unsealed. We fire N requests to
# the shared alias (load-balanced) and all must return Sealed: false. A single
# healthy node is not sufficient evidence that all 3 nodes are unsealed.
step "ADIM 4 — Vault durum kontrolu" step "ADIM 4 — Vault durum kontrolu"
VAULT_STATUS_OUT=$(run_vault "vault status 2>/dev/null" || true) VAULT_STATUS_OUT=$(run_vault "vault status 2>/dev/null" || true)
VAULT_INITIALIZED=$(echo "$VAULT_STATUS_OUT" | awk '/^Initialized/{print $2}') VAULT_INITIALIZED=$(echo "$VAULT_STATUS_OUT" | awk '/^Initialized/{print $2}')
@ -95,12 +123,17 @@ VAULT_SEALED=$(echo "$VAULT_STATUS_OUT" | awk '/^Sealed/{print $2}')
info "Initialized: ${VAULT_INITIALIZED:-unknown}, Sealed: ${VAULT_SEALED:-unknown}" info "Initialized: ${VAULT_INITIALIZED:-unknown}, Sealed: ${VAULT_SEALED:-unknown}"
if [ "$VAULT_INITIALIZED" = "true" ] && [ "$VAULT_SEALED" = "false" ]; then if [ "$VAULT_INITIALIZED" = "true" ] && [ "$VAULT_SEALED" = "false" ]; then
ok "Vault zaten initialize edilmis ve unsealed" info "En az 1 node saglikli — cluster geneli kontrol ediliyor (9 istek)..."
echo if check_cluster_unsealed 9; then
echo "════════════════════════════════════════════════" ok "Vault cluster tamamen unsealed ve saglikli"
echo " BOOTSTRAP TAMAMLANDI (Vault saglıklı)" echo
echo "════════════════════════════════════════════════" echo "════════════════════════════════════════════════"
exit 0 echo " BOOTSTRAP TAMAMLANDI (Vault saglıklı)"
echo "════════════════════════════════════════════════"
exit 0
else
info "Bazi node'lar hala sealed — bootstrap devam ediyor..."
fi
fi fi
# ━━━ ADIM 5 — Vault initialize (gerekirse) ━━━━━━━━━━━━━━━━━━━━━━━━━ # ━━━ ADIM 5 — Vault initialize (gerekirse) ━━━━━━━━━━━━━━━━━━━━━━━━━
@ -137,22 +170,93 @@ echo "$UNSEAL_KEY" | docker secret create vault_unseal_key - >/dev/null
docker service update --secret-add vault_unseal_key "${STACK_NAME}_vault" >/dev/null docker service update --secret-add vault_unseal_key "${STACK_NAME}_vault" >/dev/null
ok "vault_unseal_key gercek degerle guncellendi" ok "vault_unseal_key gercek degerle guncellendi"
# ━━━ ADIM 7 — Unseal dogrula ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ━━━ ADIM 6b — Leader unseal ve peer node'lar ━━━━━━━━━━━━━━━━━━━━━━
step "ADIM 7 — Vault unseal dogrulaniyor" # After rolling restart:
info "Rolling restart tamamlanmasi ve unseal bekleniyor (30s)..." # - The node that ran 'vault operator init' has Raft data; its entrypoint retry
sleep 30 # loop will unseal it and it becomes the Raft leader.
# - Peer nodes start with EMPTY Raft storage. They cannot unseal until they join
# the Raft cluster (chicken-and-egg). The entrypoint retry loop keeps trying
# every 2s; once they join Raft they become Initialized=true and the next
# unseal attempt succeeds.
# - We also try to unseal peers explicitly by node_id (= STABLE_ID = api_addr
# hostname). This requires the node_id to be resolvable on the overlay network.
# If it is not, the explicit attempt is silently skipped and the entrypoint
# retry loop handles it instead (worst case: ~60s extra wait).
step "ADIM 6b — Raft leader bekleniyor ve peer node'lar unsealing"
info "Rolling restart sonrasi Raft leader unseal bekleniyor (max 3 dakika)..."
UNSEALED=0 LEADER_UP=0
for i in $(seq 1 12); do for i in $(seq 1 36); do
STATUS=$(run_vault "vault status 2>/dev/null" | awk '/^Sealed/{print $2}' || echo "true") STATUS=$(run_vault "vault status 2>/dev/null" | awk '/^Sealed/{print $2}' || echo "true")
if [ "$STATUS" = "false" ]; then if [ "$STATUS" = "false" ]; then
ok "Vault cluster unsealed" ok "Raft leader unsealed"
LEADER_UP=1
break
fi
echo " ${i}/36 — Sealed: ${STATUS}, 5s bekleniyor..."
sleep 5
done
[ "$LEADER_UP" -eq 1 ] || fail "Raft leader 3 dakika icinde unseal olmadi"
ROOT_TOKEN=$(awk '/^Initial Root Token:/{print $NF}' "$MAIN_INIT_FILE")
[ -n "$ROOT_TOKEN" ] || fail "Root token '$MAIN_INIT_FILE' dosyasinda bulunamadi"
VAULT_TOKEN="$ROOT_TOKEN"
# Wait for all peers to join the Raft cluster (retry_join retries every ~30s).
info "Raft cluster olusmasi bekleniyor (3 peer, max 3 dakika)..."
ALL_JOINED=0
for i in $(seq 1 36); do
PEER_COUNT=$(run_vault "vault operator raft list-peers 2>/dev/null" \
| awk 'NR>2 && /[a-zA-Z0-9]/{c++} END{print c+0}' || true)
if [ "${PEER_COUNT:-0}" -ge 3 ]; then
ok "Raft cluster tam: ${PEER_COUNT}/3 peer"
ALL_JOINED=1
break
fi
echo " ${i}/36 — Raft peers: ${PEER_COUNT:-0}/3, 5s bekleniyor..."
sleep 5
done
[ "$ALL_JOINED" -eq 1 ] || fail "Raft cluster 3 dakika icinde tam olusmaadi"
# Explicitly unseal each non-leader peer via its node_id on the overlay network.
# node_id equals STABLE_ID (the api_addr hostname configured in vault-template-v2.json).
# Best-effort: if the hostname is not resolvable, the entrypoint retry loop handles it.
info "Peer node'lar individually unsealing (best-effort)..."
PEER_HOSTS=$(run_vault "vault operator raft list-peers 2>/dev/null" \
| awk 'NR>2 && /[a-zA-Z0-9]/ && !/leader/{print $1}' || true)
for peer_host in $PEER_HOSTS; do
info " Unsealing peer: $peer_host"
if run_vault_on "$peer_host" "vault operator unseal $UNSEAL_KEY" > /dev/null 2>&1; then
ok " $peer_host: unseal komutu gonderildi"
else
info " $peer_host: direct unseal basarisiz (overlay DNS resolve edilemedi — entrypoint loop devam ediyor)"
fi
done
# ━━━ ADIM 7 — Tum node'lar unsealed mi? ━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# Fire 9 requests to the shared alias with 1s sleep between each. With 3 nodes
# and any reasonable load-balancing the probability of hitting all 3 is very high.
# All 9 must return Sealed: false. We retry for up to 4 minutes to give the
# entrypoint retry loop time to finish for nodes that joined Raft late.
step "ADIM 7 — Vault cluster tam unseal dogrulaniyor"
info "Entrypoint retry loop tamamlanmasi bekleniyor (max 4 dakika)..."
UNSEALED=0
for i in $(seq 1 24); do
if check_cluster_unsealed 9; then
ok "Vault cluster tamamen unsealed (9/9 kontrol basarili)"
UNSEALED=1 UNSEALED=1
break break
fi fi
[ "$i" -eq 12 ] && break echo " ${i}/24 — Cluster henuz tam saglikli degil, 10s bekleniyor..."
echo " ${i}/12 — Sealed: $STATUS, retrying in 5s..." # Re-attempt explicit peer unseal on every iteration in case hostname became
sleep 5 # resolvable after Raft catch-up (containers may still be starting up).
PEER_HOSTS=$(run_vault "vault operator raft list-peers 2>/dev/null" \
| awk 'NR>2 && /[a-zA-Z0-9]/ && !/leader/{print $1}' || true)
for peer_host in $PEER_HOSTS; do
run_vault_on "$peer_host" "vault operator unseal $UNSEAL_KEY" > /dev/null 2>&1 || true
done
sleep 10
done done
[ "$UNSEALED" -eq 1 ] || fail "Vault cluster unseal olmadi — 'docker service logs ${STACK_NAME}_vault' ile loglari kontrol edin" [ "$UNSEALED" -eq 1 ] || fail "Vault cluster unseal olmadi — 'docker service logs ${STACK_NAME}_vault' ile loglari kontrol edin"