fix(vault): Stable Raft cluster formation and reliable multi-node unseal on Docker Swarm
Root cause: Docker Swarm assigns a new random container ID as $HOSTNAME on every
task restart, making node_id, api_addr, and cluster_addr change with each restart.
Vault could not recognize its own Raft data → cluster never reformed after restart.
Fixes:
- docker-stack-vault.yml: add hostname: "vault-{{.Task.Slot}}.iklim.co" so each
replica gets a stable, slot-based hostname covered by the *.iklim.co wildcard cert.
Replace STABLE_ID/NODE_ID_PLACEHOLDER logic with a single HOSTNAME_PLACEHOLDER sed.
Replace single unseal attempt with a retry loop (90×2s) so peer nodes unseal as
soon as they join Raft, without needing external intervention.
- vault-bootstrap.sh: add ADIM 6b — after rolling restart, wait for Raft leader to
unseal, wait for all peers to join Raft (vault operator raft list-peers), then
attempt explicit per-peer unseal via overlay network (best-effort).
ADIM 4 early-exit now fires N requests to the shared alias; all must return
Sealed: false before declaring the cluster healthy.
ADIM 7 polls up to 4 minutes via check_cluster_unsealed (9 shared-alias requests)
and retries peer unseal on each iteration.
- deploy-prod.yml: health check now fires 9 requests to the shared alias; all must
return Sealed: false (single-node check was masking partially-sealed clusters).
This commit is contained in:
parent
2ec208b7a2
commit
392a015b8d
@ -38,12 +38,19 @@ jobs:
|
||||
|
||||
- name: Verify Vault Cluster Health
|
||||
run: |
|
||||
SEALED=$(docker run --rm --network iklimco-net hashicorp/vault:2.0.1 \
|
||||
sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true vault status 2>/dev/null" \
|
||||
| awk '/^Sealed/{print $2}' || echo "true")
|
||||
if [ "$SEALED" = "false" ]; then
|
||||
echo "Vault cluster is unsealed and healthy"
|
||||
# Fire 9 requests to the shared alias (load-balanced across all 3 nodes).
|
||||
# Every request must return Sealed: false — one healthy node is not enough.
|
||||
SEALED_COUNT=0
|
||||
for i in $(seq 1 9); do
|
||||
SEALED=$(docker run --rm --network iklimco-net hashicorp/vault:2.0.1 \
|
||||
sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true vault status 2>/dev/null" \
|
||||
| awk '/^Sealed/{print $2}' || echo "true")
|
||||
[ "$SEALED" = "true" ] && SEALED_COUNT=$((SEALED_COUNT+1))
|
||||
sleep 1
|
||||
done
|
||||
if [ "$SEALED_COUNT" -eq 0 ]; then
|
||||
echo "Vault cluster is fully unsealed and healthy (9/9 checks passed)"
|
||||
else
|
||||
echo "ERROR: Vault cluster is sealed or unreachable"
|
||||
echo "ERROR: $SEALED_COUNT/9 checks returned sealed or unreachable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@ -11,13 +11,18 @@ services:
|
||||
image: hashicorp/vault:2.0.1
|
||||
cap_add:
|
||||
- IPC_LOCK
|
||||
# Overriding the default entrypoint to manipulate configuration strictly in RAM
|
||||
# hostname uses the service slot number (stable across restarts) so that node_id,
|
||||
# api_addr, and cluster_addr remain consistent after every container restart.
|
||||
# vault-N.iklim.co is covered by the *.iklim.co wildcard cert (TLS works).
|
||||
hostname: "vault-{{.Task.Slot}}.iklim.co"
|
||||
entrypoint: ["sh", "-c"]
|
||||
# 1. Resolves HOSTNAME_PLACEHOLDER via sed entirely in RAM (/dev/shm) — no secret touches disk
|
||||
# 1. Substitutes HOSTNAME_PLACEHOLDER with $HOSTNAME (vault-N.iklim.co) in RAM (/dev/shm)
|
||||
# 2. Starts vault server in background
|
||||
# 3. Registers SIGTERM/SIGINT trap for graceful shutdown
|
||||
# 4. Polls vault status; exit code 1 = not yet ready, 0 or 2 = vault is responding
|
||||
# 5. Auto-unseals using vault_unseal_key Docker secret (no-op if key is wrong or file missing)
|
||||
# 5. Retry-unseal loop: attempts unseal every 2s for up to 3 min.
|
||||
# On initial bootstrap peers have empty Raft storage and cannot unseal until they
|
||||
# join the cluster; the loop keeps retrying so they unseal as soon as Raft join succeeds.
|
||||
# 6. Waits for vault to exit and propagates exit code to Docker
|
||||
command: >
|
||||
"cat /vault/config/vault.json | sed \"s/HOSTNAME_PLACEHOLDER/$$HOSTNAME/g\" > /dev/shm/vault.json;
|
||||
@ -26,7 +31,7 @@ services:
|
||||
trap 'kill -TERM $$VAULT_PID; wait $$VAULT_PID' TERM INT;
|
||||
export VAULT_ADDR='https://127.0.0.1:8200' VAULT_SKIP_VERIFY='true';
|
||||
for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do vault status > /dev/null 2>&1; [ $$? -ne 1 ] && break; sleep 2; done;
|
||||
vault operator unseal $$(cat /run/secrets/vault_unseal_key 2>/dev/null) > /dev/null 2>&1 || true;
|
||||
i=0; while [ $$i -lt 90 ]; do vault status > /dev/null 2>&1 && break; vault operator unseal $$(cat /run/secrets/vault_unseal_key 2>/dev/null) > /dev/null 2>&1 || true; sleep 2; i=$$(($$i+1)); done;
|
||||
wait $$VAULT_PID"
|
||||
networks:
|
||||
iklimco-net:
|
||||
|
||||
@ -56,6 +56,31 @@ run_vault() {
|
||||
sh -c "VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true $cmd"
|
||||
fi
|
||||
}
|
||||
|
||||
# Run a vault CLI command targeting a specific node by its node_id (= STABLE_ID =
|
||||
# the api_addr hostname set inside the container). Used for direct per-peer unseal.
|
||||
run_vault_on() {
|
||||
local node_id="$1"; shift
|
||||
local cmd="$*"
|
||||
[ -n "$VAULT_TOKEN" ] && cmd="VAULT_TOKEN=$VAULT_TOKEN $cmd"
|
||||
docker run --rm -i --network iklimco-net hashicorp/vault:2.0.1 \
|
||||
sh -c "VAULT_ADDR=https://${node_id}:8200 VAULT_SKIP_VERIFY=true $cmd"
|
||||
}
|
||||
|
||||
# Send N requests to the shared alias; returns 0 only when ALL return Sealed: false.
|
||||
# Runs everything inside a single docker container to avoid 9 separate startups.
|
||||
check_cluster_unsealed() {
|
||||
local n="${1:-9}"
|
||||
docker run --rm --network iklimco-net hashicorp/vault:2.0.1 sh -c "
|
||||
sealed=0; i=0
|
||||
while [ \$i -lt $n ]; do
|
||||
s=\$(VAULT_ADDR=https://vault.iklim.co:8200 VAULT_SKIP_VERIFY=true vault status 2>/dev/null | awk '/^Sealed/{print \$2}' || echo 'true')
|
||||
[ \"\$s\" = 'true' ] && sealed=\$((sealed+1))
|
||||
i=\$((i+1)); [ \$i -lt $n ] && sleep 1
|
||||
done
|
||||
exit \$sealed
|
||||
"
|
||||
}
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
# ━━━ ADIM 0 — On kosullar ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
@ -87,7 +112,10 @@ step "ADIM 3 — Vault cluster bekleniyor"
|
||||
wait_service_running "${STACK_NAME}_vault" 3 300
|
||||
sleep 10
|
||||
|
||||
# ━━━ ADIM 4 — Vault durum kontrolu ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
# ━━━ ADIM 4 — Vault durum kontrolu (erken cikis) ━━━━━━━━━━━━━━━━━━━
|
||||
# Early-exit requires the ENTIRE cluster to be unsealed. We fire N requests to
|
||||
# the shared alias (load-balanced) and all must return Sealed: false. A single
|
||||
# healthy node is not sufficient evidence that all 3 nodes are unsealed.
|
||||
step "ADIM 4 — Vault durum kontrolu"
|
||||
VAULT_STATUS_OUT=$(run_vault "vault status 2>/dev/null" || true)
|
||||
VAULT_INITIALIZED=$(echo "$VAULT_STATUS_OUT" | awk '/^Initialized/{print $2}')
|
||||
@ -95,12 +123,17 @@ VAULT_SEALED=$(echo "$VAULT_STATUS_OUT" | awk '/^Sealed/{print $2}')
|
||||
info "Initialized: ${VAULT_INITIALIZED:-unknown}, Sealed: ${VAULT_SEALED:-unknown}"
|
||||
|
||||
if [ "$VAULT_INITIALIZED" = "true" ] && [ "$VAULT_SEALED" = "false" ]; then
|
||||
ok "Vault zaten initialize edilmis ve unsealed"
|
||||
echo
|
||||
echo "════════════════════════════════════════════════"
|
||||
echo " BOOTSTRAP TAMAMLANDI (Vault saglıklı)"
|
||||
echo "════════════════════════════════════════════════"
|
||||
exit 0
|
||||
info "En az 1 node saglikli — cluster geneli kontrol ediliyor (9 istek)..."
|
||||
if check_cluster_unsealed 9; then
|
||||
ok "Vault cluster tamamen unsealed ve saglikli"
|
||||
echo
|
||||
echo "════════════════════════════════════════════════"
|
||||
echo " BOOTSTRAP TAMAMLANDI (Vault saglıklı)"
|
||||
echo "════════════════════════════════════════════════"
|
||||
exit 0
|
||||
else
|
||||
info "Bazi node'lar hala sealed — bootstrap devam ediyor..."
|
||||
fi
|
||||
fi
|
||||
|
||||
# ━━━ ADIM 5 — Vault initialize (gerekirse) ━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
@ -137,22 +170,93 @@ echo "$UNSEAL_KEY" | docker secret create vault_unseal_key - >/dev/null
|
||||
docker service update --secret-add vault_unseal_key "${STACK_NAME}_vault" >/dev/null
|
||||
ok "vault_unseal_key gercek degerle guncellendi"
|
||||
|
||||
# ━━━ ADIM 7 — Unseal dogrula ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
step "ADIM 7 — Vault unseal dogrulaniyor"
|
||||
info "Rolling restart tamamlanmasi ve unseal bekleniyor (30s)..."
|
||||
sleep 30
|
||||
# ━━━ ADIM 6b — Leader unseal ve peer node'lar ━━━━━━━━━━━━━━━━━━━━━━
|
||||
# After rolling restart:
|
||||
# - The node that ran 'vault operator init' has Raft data; its entrypoint retry
|
||||
# loop will unseal it and it becomes the Raft leader.
|
||||
# - Peer nodes start with EMPTY Raft storage. They cannot unseal until they join
|
||||
# the Raft cluster (chicken-and-egg). The entrypoint retry loop keeps trying
|
||||
# every 2s; once they join Raft they become Initialized=true and the next
|
||||
# unseal attempt succeeds.
|
||||
# - We also try to unseal peers explicitly by node_id (= STABLE_ID = api_addr
|
||||
# hostname). This requires the node_id to be resolvable on the overlay network.
|
||||
# If it is not, the explicit attempt is silently skipped and the entrypoint
|
||||
# retry loop handles it instead (worst case: ~60s extra wait).
|
||||
step "ADIM 6b — Raft leader bekleniyor ve peer node'lar unsealing"
|
||||
info "Rolling restart sonrasi Raft leader unseal bekleniyor (max 3 dakika)..."
|
||||
|
||||
UNSEALED=0
|
||||
for i in $(seq 1 12); do
|
||||
LEADER_UP=0
|
||||
for i in $(seq 1 36); do
|
||||
STATUS=$(run_vault "vault status 2>/dev/null" | awk '/^Sealed/{print $2}' || echo "true")
|
||||
if [ "$STATUS" = "false" ]; then
|
||||
ok "Vault cluster unsealed"
|
||||
ok "Raft leader unsealed"
|
||||
LEADER_UP=1
|
||||
break
|
||||
fi
|
||||
echo " ${i}/36 — Sealed: ${STATUS}, 5s bekleniyor..."
|
||||
sleep 5
|
||||
done
|
||||
[ "$LEADER_UP" -eq 1 ] || fail "Raft leader 3 dakika icinde unseal olmadi"
|
||||
|
||||
ROOT_TOKEN=$(awk '/^Initial Root Token:/{print $NF}' "$MAIN_INIT_FILE")
|
||||
[ -n "$ROOT_TOKEN" ] || fail "Root token '$MAIN_INIT_FILE' dosyasinda bulunamadi"
|
||||
VAULT_TOKEN="$ROOT_TOKEN"
|
||||
|
||||
# Wait for all peers to join the Raft cluster (retry_join retries every ~30s).
|
||||
info "Raft cluster olusmasi bekleniyor (3 peer, max 3 dakika)..."
|
||||
ALL_JOINED=0
|
||||
for i in $(seq 1 36); do
|
||||
PEER_COUNT=$(run_vault "vault operator raft list-peers 2>/dev/null" \
|
||||
| awk 'NR>2 && /[a-zA-Z0-9]/{c++} END{print c+0}' || true)
|
||||
if [ "${PEER_COUNT:-0}" -ge 3 ]; then
|
||||
ok "Raft cluster tam: ${PEER_COUNT}/3 peer"
|
||||
ALL_JOINED=1
|
||||
break
|
||||
fi
|
||||
echo " ${i}/36 — Raft peers: ${PEER_COUNT:-0}/3, 5s bekleniyor..."
|
||||
sleep 5
|
||||
done
|
||||
[ "$ALL_JOINED" -eq 1 ] || fail "Raft cluster 3 dakika icinde tam olusmaadi"
|
||||
|
||||
# Explicitly unseal each non-leader peer via its node_id on the overlay network.
|
||||
# node_id equals STABLE_ID (the api_addr hostname configured in vault-template-v2.json).
|
||||
# Best-effort: if the hostname is not resolvable, the entrypoint retry loop handles it.
|
||||
info "Peer node'lar individually unsealing (best-effort)..."
|
||||
PEER_HOSTS=$(run_vault "vault operator raft list-peers 2>/dev/null" \
|
||||
| awk 'NR>2 && /[a-zA-Z0-9]/ && !/leader/{print $1}' || true)
|
||||
for peer_host in $PEER_HOSTS; do
|
||||
info " Unsealing peer: $peer_host"
|
||||
if run_vault_on "$peer_host" "vault operator unseal $UNSEAL_KEY" > /dev/null 2>&1; then
|
||||
ok " $peer_host: unseal komutu gonderildi"
|
||||
else
|
||||
info " $peer_host: direct unseal basarisiz (overlay DNS resolve edilemedi — entrypoint loop devam ediyor)"
|
||||
fi
|
||||
done
|
||||
|
||||
# ━━━ ADIM 7 — Tum node'lar unsealed mi? ━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
# Fire 9 requests to the shared alias with 1s sleep between each. With 3 nodes
|
||||
# and any reasonable load-balancing the probability of hitting all 3 is very high.
|
||||
# All 9 must return Sealed: false. We retry for up to 4 minutes to give the
|
||||
# entrypoint retry loop time to finish for nodes that joined Raft late.
|
||||
step "ADIM 7 — Vault cluster tam unseal dogrulaniyor"
|
||||
info "Entrypoint retry loop tamamlanmasi bekleniyor (max 4 dakika)..."
|
||||
|
||||
UNSEALED=0
|
||||
for i in $(seq 1 24); do
|
||||
if check_cluster_unsealed 9; then
|
||||
ok "Vault cluster tamamen unsealed (9/9 kontrol basarili)"
|
||||
UNSEALED=1
|
||||
break
|
||||
fi
|
||||
[ "$i" -eq 12 ] && break
|
||||
echo " ${i}/12 — Sealed: $STATUS, retrying in 5s..."
|
||||
sleep 5
|
||||
echo " ${i}/24 — Cluster henuz tam saglikli degil, 10s bekleniyor..."
|
||||
# Re-attempt explicit peer unseal on every iteration in case hostname became
|
||||
# resolvable after Raft catch-up (containers may still be starting up).
|
||||
PEER_HOSTS=$(run_vault "vault operator raft list-peers 2>/dev/null" \
|
||||
| awk 'NR>2 && /[a-zA-Z0-9]/ && !/leader/{print $1}' || true)
|
||||
for peer_host in $PEER_HOSTS; do
|
||||
run_vault_on "$peer_host" "vault operator unseal $UNSEAL_KEY" > /dev/null 2>&1 || true
|
||||
done
|
||||
sleep 10
|
||||
done
|
||||
|
||||
[ "$UNSEALED" -eq 1 ] || fail "Vault cluster unseal olmadi — 'docker service logs ${STACK_NAME}_vault' ile loglari kontrol edin"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user