feat(vault): Add cluster health check to skip bootstrap

Integrates `vault-check-health.sh` into `vault-bootstrap.sh` to perform a network-based health check. If all Vault nodes are found initialized and unsealed, the bootstrap process is skipped, preventing unnecessary restarts or re-initialization.

Renames `failover_scenarios.md` to `vault_failover_scenarios.md` for improved clarity and consistency.
This commit is contained in:
Murat ÖZDEMİR 2026-06-12 09:42:10 +03:00
parent 483bd40cc4
commit 99af68deb2
3 changed files with 44 additions and 0 deletions

View File

@ -89,6 +89,16 @@ docker node ls &>/dev/null || fail "Swarm manager node is required"
[ -f "$STACK_FILE" ] || fail "Stack file not found: $STACK_FILE" [ -f "$STACK_FILE" ] || fail "Stack file not found: $STACK_FILE"
ok "Prerequisites completed" ok "Prerequisites completed"
# ━━━ NEW: Cluster Health Check ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# Check if the cluster is already healthy to avoid unnecessary restarts
if bash "$(dirname "$0")/vault-check-health.sh"; then
echo
echo "════════════════════════════════════════════════"
echo " CLUSTER ALREADY HEALTHY — Skipping Bootstrap"
echo "════════════════════════════════════════════════"
exit 0
fi
# ━━━ STEP 1 — Placeholder secret ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # ━━━ STEP 1 — Placeholder secret ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
step "STEP 1 — Checking vault_unseal_key" step "STEP 1 — Checking vault_unseal_key"
if docker secret ls --format '{{.Name}}' | grep -q '^vault_unseal_key'; then if docker secret ls --format '{{.Name}}' | grep -q '^vault_unseal_key'; then

34
vault-check-health.sh Executable file
View File

@ -0,0 +1,34 @@
#!/bin/bash
# vault-check-health.sh — Verifies cluster health over the network (no token required).
# Returns 0 (success) if all 3 nodes are initialized and unsealed.
VAULT_NODES=("vault-1.iklim.co" "vault-2.iklim.co" "vault-3.iklim.co")
HEALTHY_COUNT=0
echo " --> Starting cluster health check (network-based)..."
for node in "${VAULT_NODES[@]}"; do
# Check the /v1/sys/health endpoint for each node.
# 200: Initialized, unsealed, active
# 429: Initialized, unsealed, standby
# 501: Not initialized
# 503: Sealed
status_code=$(docker run --rm --network iklimco-net alpine/curl -s -o /dev/null -w "%{http_code}" \
--max-time 3 -k "https://${node}:8200/v1/sys/health" || echo "000")
if [ "$status_code" = "200" ] || [ "$status_code" = "429" ]; then
echo " [✓] $node: Healthy (Status: $status_code)"
HEALTHY_COUNT=$((HEALTHY_COUNT + 1))
else
echo " [!] $node: Problematic or Not Responding (Status: $status_code)"
fi
done
if [ "$HEALTHY_COUNT" -eq 3 ]; then
echo " --> Result: All nodes (3/3) are healthy."
exit 0
else
echo " --> Result: Cluster is not fully healthy ($HEALTHY_COUNT/3)."
exit 1
fi