feat(health-agent): add CI/CD pipeline, Uptime Kuma setup, and runtime configuration
Some checks failed
Deploy Environment Monitoring to Production Environment / deploy (push) Failing after 10s
Some checks failed
Deploy Environment Monitoring to Production Environment / deploy (push) Failing after 10s
Deploy workflows: - Integrate health-agent build (test) and image promotion (prod) into monitoring stack workflows - Add storagebox download of health-agent runtime (.env.monitoring.health-agent-runtime → health-agent/.env) and setup (.env.monitoring.health-agent-setup → health-agent/.env.setup) env files - Add "Run Uptime Kuma Setup" step: runs setup_uptime_kuma.py inside the built image only when uk_tokens.yml is missing, writes tokens to HEALTH_AGENT_CONFIG_GENERATED_DIR (/mnt/storagebox/monitoring/uk_generated) - Add health-agent/** and health-agent/deploy/prod.env path triggers to test and prod workflows respectively - Add HARBOR_CI_TOKEN login and HARBOR_PULL_TOKEN login before stack deploy in both workflows - Source health-agent/.env before docker stack deploy to expose HEALTH_AGENT_CONFIG_GENERATED_DIR Dockerfile: - Copy config/ and scripts/ into image so setup_uptime_kuma.py can run inside the container setup_uptime_kuma.py: - Load .env and .env.setup automatically via python-dotenv (no manual export needed) - Write uk_tokens.yml to config/generated/ (aligned with container volume mount) Health checks: - PATRONI_HOSTS and VAULT_HOSTS are now configurable via env vars (comma-separated host:port); no code change needed when node count changes - REDIS_SENTINEL_HOSTS now correctly parses host:port format; default updated to redis-sentinel:26379 - Fix NameError in check_patroni_cluster() caused by leftover node variable after loop refactor - Remove verify_ssl=False from Vault check; vault.iklim.co has a valid certificate Ops: - Add ops/build-and-push-health-agent.sh for manual bypass of CI pipeline - Add health-agent/deploy/prod.env template for prod image promotion manifest Project structure: - Move .env.example and .env.setup.example to health-agent/env-example/ (root .gitignore excludes health-agent/.env*) - Add root .gitignore: excludes uk_tokens.yml, __pycache__, .venv, and env files - Remove health-agent/.gitignore (superseded by root .gitignore)
This commit is contained in:
parent
062d3ff90d
commit
58d5c24f41
@ -6,6 +6,7 @@ on:
|
||||
- prod-env
|
||||
paths:
|
||||
- 'docker-stack-monitoring.yml'
|
||||
- 'health-agent/deploy/prod.env'
|
||||
- 'swag/**'
|
||||
- '.gitea/workflows/deploy-monitoring-prod.yml'
|
||||
|
||||
@ -47,17 +48,73 @@ jobs:
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=PROD
|
||||
rm -f .env .env.secrets.swag
|
||||
rm -f .env .env.secrets.swag health-agent/.env health-agent/.env.setup
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env ./.env
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env.secrets.swag ./.env.secrets.swag
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env.monitoring.health-agent-runtime ./health-agent/.env
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env.monitoring.health-agent-setup ./health-agent/.env.setup
|
||||
require_env_file ./.env "Main env file"
|
||||
require_env_file ./.env.secrets.swag "SWAG secrets"
|
||||
require_env_file ./health-agent/.env "Health-agent runtime env"
|
||||
require_env_file ./health-agent/.env.setup "Health-agent setup env"
|
||||
|
||||
- name: Promote Health Agent Image
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=PROD
|
||||
source_env_file ./health-agent/deploy/prod.env
|
||||
if [ -z "${SOURCE_IMAGE_DIGEST:-}" ] || [ -z "${PROD_IMAGE_TAG:-}" ]; then
|
||||
log_message "INFO" "health-agent/deploy/prod.env is empty — skipping health-agent promotion"
|
||||
exit 0
|
||||
fi
|
||||
case "$SOURCE_IMAGE_DIGEST" in
|
||||
registry.tarla.io/iklimco/health-agent@sha256:*) ;;
|
||||
*) log_message "ERROR" "SOURCE_IMAGE_DIGEST must be registry.tarla.io/iklimco/health-agent@sha256:<digest>"; exit 1 ;;
|
||||
esac
|
||||
case "$PROD_IMAGE_TAG" in
|
||||
*-rc*) log_message "ERROR" "PROD_IMAGE_TAG must not contain -rc"; exit 1 ;;
|
||||
esac
|
||||
PROD_IMAGE="registry.tarla.io/iklimco/health-agent:${PROD_IMAGE_TAG}"
|
||||
echo "${{ secrets.HARBOR_CI_TOKEN }}" | \
|
||||
docker login registry.tarla.io -u robot-ci-push-iklimco --password-stdin
|
||||
docker pull "${SOURCE_IMAGE_DIGEST}"
|
||||
docker tag "${SOURCE_IMAGE_DIGEST}" "${PROD_IMAGE}"
|
||||
docker push "${PROD_IMAGE}"
|
||||
if grep -q "^IMAGE_HEALTH_AGENT=" .env; then
|
||||
sed -i "s|^IMAGE_HEALTH_AGENT=.*$|IMAGE_HEALTH_AGENT=health-agent:${PROD_IMAGE_TAG}|" .env
|
||||
else
|
||||
echo "IMAGE_HEALTH_AGENT=health-agent:${PROD_IMAGE_TAG}" >> .env
|
||||
fi
|
||||
echo "HEALTH_AGENT_IMAGE=${PROD_IMAGE}" >> $GITHUB_ENV
|
||||
log_message "SUCCESS" "Promoted: ${PROD_IMAGE}"
|
||||
|
||||
- name: Run Uptime Kuma Setup
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=PROD
|
||||
source_env_file ./health-agent/.env
|
||||
mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
||||
if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
|
||||
docker run --rm \
|
||||
-v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
|
||||
--env-file "$(pwd)/health-agent/.env" \
|
||||
--env-file "$(pwd)/health-agent/.env.setup" \
|
||||
"${HEALTH_AGENT_IMAGE}" \
|
||||
python scripts/setup_uptime_kuma.py
|
||||
log_message "SUCCESS" "Uptime Kuma setup complete, tokens written to ${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
||||
else
|
||||
log_message "INFO" "uk_tokens.yml already exists, skipping Uptime Kuma setup"
|
||||
fi
|
||||
|
||||
- name: Deploy Monitoring Stack
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=PROD
|
||||
source_env_file ./.env
|
||||
source_env_file ./health-agent/.env
|
||||
export HEALTH_AGENT_ENV_FILE="$(pwd)/health-agent/.env"
|
||||
echo "${{ secrets.HARBOR_PULL_TOKEN }}" | \
|
||||
docker login registry.tarla.io -u robot-swarm-pull-iklimco --password-stdin
|
||||
|
||||
# Remove leftover dozzle_users Docker secret from previous setup
|
||||
docker secret rm dozzle_users 2>/dev/null || true
|
||||
|
||||
@ -6,6 +6,7 @@ on:
|
||||
- test
|
||||
paths:
|
||||
- 'docker-stack-monitoring.yml'
|
||||
- 'health-agent/**'
|
||||
- 'swag/**'
|
||||
- '.gitea/workflows/deploy-monitoring-test.yml'
|
||||
|
||||
@ -43,18 +44,67 @@ jobs:
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=TEST
|
||||
rm -f .env .env.secrets.swag
|
||||
rm -f .env .env.secrets.swag health-agent/.env health-agent/.env.setup
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env ./.env
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env.secrets.swag ./.env.secrets.swag
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env.monitoring.health-agent-runtime ./health-agent/.env
|
||||
scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env.monitoring.health-agent-setup ./health-agent/.env.setup
|
||||
require_env_file ./.env "Main env file"
|
||||
require_env_file ./.env.secrets.swag "SWAG secrets"
|
||||
require_env_file ./health-agent/.env "Health-agent runtime env"
|
||||
require_env_file ./health-agent/.env.setup "Health-agent setup env"
|
||||
|
||||
- name: Build and Push Health Agent
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=TEST
|
||||
VERSION=$(sed -n 's/^version = "\(.*\)"/\1/p' health-agent/pyproject.toml)
|
||||
IMAGE_TAG="health-agent:${VERSION}-rc"
|
||||
IMAGE_FULL="registry.tarla.io/iklimco/${IMAGE_TAG}"
|
||||
echo "${{ secrets.HARBOR_CI_TOKEN }}" | \
|
||||
docker login registry.tarla.io -u robot-ci-push-iklimco --password-stdin
|
||||
docker build -t "${IMAGE_FULL}" health-agent/
|
||||
docker push "${IMAGE_FULL}"
|
||||
docker pull -q "${IMAGE_FULL}"
|
||||
DIGEST=$(docker image inspect "${IMAGE_FULL}" --format '{{index .RepoDigests 0}}')
|
||||
if grep -q "^IMAGE_HEALTH_AGENT=" .env; then
|
||||
sed -i "s|^IMAGE_HEALTH_AGENT=.*$|IMAGE_HEALTH_AGENT=${IMAGE_TAG}|" .env
|
||||
else
|
||||
echo "IMAGE_HEALTH_AGENT=${IMAGE_TAG}" >> .env
|
||||
fi
|
||||
echo "HEALTH_AGENT_IMAGE=${IMAGE_FULL}" >> $GITHUB_ENV
|
||||
log_message "SUCCESS" "Pushed: ${IMAGE_FULL}"
|
||||
log_message "INFO" "Promotion manifest — write to health-agent/deploy/prod.env on prod-env branch:"
|
||||
echo " SOURCE_IMAGE_DIGEST=${DIGEST}"
|
||||
echo " PROD_IMAGE_TAG=${VERSION}"
|
||||
|
||||
- name: Run Uptime Kuma Setup
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=TEST
|
||||
source_env_file ./health-agent/.env
|
||||
mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
||||
if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
|
||||
docker run --rm \
|
||||
-v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
|
||||
--env-file "$(pwd)/health-agent/.env" \
|
||||
--env-file "$(pwd)/health-agent/.env.setup" \
|
||||
"${HEALTH_AGENT_IMAGE}" \
|
||||
python scripts/setup_uptime_kuma.py
|
||||
log_message "SUCCESS" "Uptime Kuma setup complete, tokens written to ${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
||||
else
|
||||
log_message "INFO" "uk_tokens.yml already exists, skipping Uptime Kuma setup"
|
||||
fi
|
||||
|
||||
- name: Deploy Monitoring Stack
|
||||
run: |
|
||||
source ./common-functions-base.sh
|
||||
export SPRING_PROFILES_ACTIVE=TEST
|
||||
source_env_file ./.env
|
||||
|
||||
source_env_file ./health-agent/.env
|
||||
export HEALTH_AGENT_ENV_FILE="$(pwd)/health-agent/.env"
|
||||
echo "${{ secrets.HARBOR_PULL_TOKEN }}" | \
|
||||
docker login registry.tarla.io -u robot-swarm-pull-iklimco --password-stdin
|
||||
docker stack deploy \
|
||||
--with-registry-auth \
|
||||
--resolve-image changed \
|
||||
|
||||
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
health-agent/config/generated/uk_tokens.yml
|
||||
*__pycache__*
|
||||
health-agent/.venv/*
|
||||
health-agent/prod-env/
|
||||
health-agent/test-env/
|
||||
health-agent/.env*
|
||||
@ -67,9 +67,8 @@ lookup_env_value() {
|
||||
}
|
||||
|
||||
# Matematiksel veya mantıksal işlem gerektiren env değerlerini hesaplar.
|
||||
# Örn: Milisaniye cinsinden JWT süresini saniyeye çevirir.
|
||||
refresh_calculated_env_vars() {
|
||||
export JWT_ACCESS_TOKEN_EXPIRATION_SEC=$(( JWT_ACCESS_TOKEN_EXPIRATION / 1000 ))
|
||||
|
||||
}
|
||||
|
||||
# Tüm çevre dosyalarını (ana env, ortak sırlar ve servis sırları) tazeleyerek yükler.
|
||||
|
||||
3
health-agent/.gitignore
vendored
3
health-agent/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
config/generated/uk_tokens.yml
|
||||
.env
|
||||
.env.setup
|
||||
@ -6,6 +6,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf
|
||||
|
||||
COPY pyproject.toml ./
|
||||
COPY src/ ./src/
|
||||
COPY config/ ./config/
|
||||
COPY scripts/ ./scripts/
|
||||
RUN pip install --no-cache-dir .
|
||||
ENV PYTHONPATH=/app/src
|
||||
|
||||
|
||||
@ -52,7 +52,7 @@ Environment_Monitoring/health-agent/
|
||||
├── Dockerfile
|
||||
├── pyproject.toml
|
||||
├── .env.example # health-agent runtime değişkenleri (credentials, ENV, CLUSTER_SIZE_*)
|
||||
└── .env.setup.example # setup script değişkenleri (UK_API_KEY, Slack webhook'ları)
|
||||
└── .env.setup.example # setup script değişkenleri (UK_URL, UK_USER, UK_PASS, Slack webhook'ları)
|
||||
```
|
||||
|
||||
---
|
||||
@ -76,6 +76,8 @@ Environment_Monitoring/health-agent/
|
||||
| `EXTERNAL_DOMAIN` | Base domain — `iklim.co` in both environments |
|
||||
| `EXTERNAL_SUBDOMAIN_SUFFIX` | Subdomain suffix — empty for prod, `-test` for test → `api-test.iklim.co` |
|
||||
| `SLACK_WEBHOOK_IKLIM_{ENV}_OPS` | Direct Slack webhook for container crash/OOM events — e.g. `SLACK_WEBHOOK_IKLIM_PROD_OPS` |
|
||||
| `PATRONI_HOSTS` | Patroni node list (comma-separated `host:port`) — e.g. `patroni-01:8008,patroni-02:8008` |
|
||||
| `VAULT_HOSTS` | Vault node subdomain list (comma-separated) — e.g. `vault-1,vault-2,vault-3` |
|
||||
| `RABBITMQ_USER` / `RABBITMQ_PASS` | RabbitMQ management credentials |
|
||||
| `MONGO_URI` | MongoDB connection URI |
|
||||
| `REDIS_PASSWORD` | Redis / Sentinel password |
|
||||
@ -104,13 +106,19 @@ Push token'ları `config/generated/uk_tokens.yml`'den otomatik okunur — bu dos
|
||||
|
||||
Health-agent deploy edilmeden önce kurulum script'i çalıştırılır. Script, `monitors.yml`'i okuyarak tüm monitor, tag, group ve status page'leri Uptime Kuma'da oluşturur; push token'larını `config/generated/uk_tokens.yml`'e yazar.
|
||||
|
||||
Script [`uptime-kuma-api`](https://pypi.org/project/uptime-kuma-api/) kütüphanesini kullanır. API key authentication desteği implementasyon öncesi doğrulanmalıdır; desteklenmiyorsa `requests` + `Authorization: Bearer <api_key>` ile REST API doğrudan çağrılır.
|
||||
Script `uptime-kuma-api-v2` kütüphanesini kullanır; Socket.IO üzerinden username/password ile bağlanır.
|
||||
|
||||
```bash
|
||||
cd Environment_Monitoring/health-agent
|
||||
|
||||
# setup değişkenlerini doldur
|
||||
cp .env.setup.example .env.setup
|
||||
# Python 3.12 venv oluştur ve aktive et
|
||||
python3.12 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# runtime ve setup değişkenlerini doldur
|
||||
cp .env.example .env # ENV, EXTERNAL_DOMAIN vb.
|
||||
cp .env.setup.example .env.setup # UK_URL, UK_USER, UK_PASS, Slack webhook'ları
|
||||
|
||||
# önce dry-run ile ne yapılacağını gör
|
||||
python scripts/setup_uptime_kuma.py --dry-run
|
||||
@ -118,10 +126,8 @@ python scripts/setup_uptime_kuma.py --dry-run
|
||||
# tüm kaynakları oluştur
|
||||
python scripts/setup_uptime_kuma.py
|
||||
|
||||
# sadece belirli bileşenleri güncelle
|
||||
python scripts/setup_uptime_kuma.py --only monitors
|
||||
python scripts/setup_uptime_kuma.py --only notifications
|
||||
python scripts/setup_uptime_kuma.py --only status-page
|
||||
# sadece belirli bir monitörü işle (monitor adıyla)
|
||||
python scripts/setup_uptime_kuma.py --only SWARM-CLUSTER
|
||||
```
|
||||
|
||||
Script idempotent çalışır — CI/CD pipeline'ında her deploy'da güvenle tetiklenebilir.
|
||||
@ -141,6 +147,10 @@ Planlı bakım/deploy sırasında etkilenecek group için Uptime Kuma'da Mainten
|
||||
```bash
|
||||
cd Environment_Monitoring/health-agent
|
||||
|
||||
# Python 3.12 venv oluştur ve aktive et
|
||||
python3.12 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
|
||||
# bağımlılıkları kur
|
||||
pip install -e ".[dev]"
|
||||
|
||||
|
||||
2
health-agent/deploy/prod.env
Normal file
2
health-agent/deploy/prod.env
Normal file
@ -0,0 +1,2 @@
|
||||
SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:fadf229d4423075d2871f9dc4a5a0afdf6dfe7c5fcd04d866b2d6d6fe8942b56
|
||||
PROD_IMAGE_TAG=0.1.0
|
||||
@ -9,6 +9,8 @@ EXTERNAL_DOMAIN=iklim.co
|
||||
EXTERNAL_SUBDOMAIN_SUFFIX=
|
||||
UK_PUSH_URL_BASE=https://uptime.tarla.io/api/push
|
||||
SLACK_WEBHOOK_IKLIM_PROD_OPS=
|
||||
PATRONI_HOSTS=patroni-01:8008,patroni-02:8008,patroni-03:8008
|
||||
VAULT_HOSTS=vault
|
||||
RABBITMQ_USER=
|
||||
RABBITMQ_PASS=
|
||||
MONGO_URI=
|
||||
@ -17,3 +19,6 @@ REDIS_MASTER_NAME=
|
||||
REDIS_SENTINEL_HOSTS=
|
||||
STORAGEBOX_PATH=
|
||||
APISIX_ADMIN_KEY=
|
||||
PATRONI_HOSTS=
|
||||
VAULT_HOSTS=
|
||||
HEALTH_AGENT_CONFIG_GENERATED_DIR=
|
||||
@ -2,11 +2,16 @@ import os
|
||||
import argparse
|
||||
import yaml
|
||||
import logging
|
||||
from dotenv import load_dotenv
|
||||
from uptime_kuma_api import UptimeKumaApi, MonitorType
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("uk-setup")
|
||||
|
||||
_root = os.path.join(os.path.dirname(__file__), "..")
|
||||
load_dotenv(os.path.join(_root, ".env"))
|
||||
load_dotenv(os.path.join(_root, ".env.setup"))
|
||||
|
||||
def format_str(text, env_name, project):
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
@ -118,7 +123,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
||||
logger.warning(f"Status page ops failed: {e}")
|
||||
|
||||
# 4. Write tokens to uk_tokens.yml
|
||||
token_file = os.path.join(os.path.dirname(__file__), "..", "config", "uk_tokens.yml")
|
||||
token_file = os.path.join(os.path.dirname(__file__), "..", "config", "generated", "uk_tokens.yml")
|
||||
if not dry_run:
|
||||
with open(token_file, "w") as f:
|
||||
yaml.dump(tokens, f)
|
||||
|
||||
@ -29,19 +29,27 @@ def http_check(url, expected_status=None, auth=None, verify_ssl=True, timeout=5,
|
||||
return False, None, ping_ms, str(e)
|
||||
|
||||
def check_patroni_cluster():
|
||||
nodes = ["patroni-01", "patroni-02", "patroni-03"]
|
||||
hosts_env = os.getenv("PATRONI_HOSTS", "patroni-01:8008,patroni-02:8008,patroni-03:8008")
|
||||
nodes = []
|
||||
for h in hosts_env.split(","):
|
||||
h = h.strip()
|
||||
if ":" in h:
|
||||
host, port = h.rsplit(":", 1)
|
||||
nodes.append((host, int(port)))
|
||||
else:
|
||||
nodes.append((h, 8008))
|
||||
cluster_data = None
|
||||
error_msg = "All Patroni nodes unreachable"
|
||||
start_t = time.time()
|
||||
|
||||
for node in nodes:
|
||||
url = f"http://{node}:8008/cluster"
|
||||
|
||||
for host, port in nodes:
|
||||
url = f"http://{host}:{port}/cluster"
|
||||
ok, resp, _, err = http_check(url, timeout=3)
|
||||
if ok and resp:
|
||||
cluster_data = resp.json()
|
||||
break
|
||||
elif err:
|
||||
error_msg = f"{node} error: {err}"
|
||||
error_msg = f"{host}:{port} error: {err}"
|
||||
|
||||
ping_ms = int((time.time() - start_t) * 1000)
|
||||
|
||||
@ -117,7 +125,8 @@ def check_apisix():
|
||||
push("APISIX-GATEWAY", "down", f"admin API unreachable: {err or resp.status_code}", ping_ms)
|
||||
|
||||
def check_vault():
|
||||
nodes = ["vault-1", "vault-2", "vault-3"]
|
||||
hosts_env = os.getenv("VAULT_HOSTS", "vault")
|
||||
nodes = [h.strip() for h in hosts_env.split(",")]
|
||||
domain = os.getenv("EXTERNAL_DOMAIN", "iklim.co")
|
||||
unsealed_count = 0
|
||||
total = len(nodes)
|
||||
@ -127,7 +136,7 @@ def check_vault():
|
||||
start_t = time.time()
|
||||
for node in nodes:
|
||||
url = f"https://{node}.{domain}:8200/v1/sys/health"
|
||||
ok, resp, ms, err = http_check(url, verify_ssl=False, expected_status=[200, 429, 473])
|
||||
ok, resp, ms, err = http_check(url, expected_status=[200, 429, 473])
|
||||
max_ping = max(max_ping, ms)
|
||||
|
||||
if resp:
|
||||
|
||||
@ -9,8 +9,15 @@ logger = logging.getLogger(__name__)
|
||||
def check_redis_sentinel():
|
||||
start_t = time.time()
|
||||
|
||||
hosts = os.getenv("REDIS_SENTINEL_HOSTS", "redis-sentinel-01,redis-sentinel-02,redis-sentinel-03")
|
||||
sentinel_nodes = [(h.strip(), 26379) for h in hosts.split(",")]
|
||||
hosts = os.getenv("REDIS_SENTINEL_HOSTS", "redis-sentinel:26379")
|
||||
sentinel_nodes = []
|
||||
for h in hosts.split(","):
|
||||
h = h.strip()
|
||||
if ":" in h:
|
||||
host, port = h.rsplit(":", 1)
|
||||
sentinel_nodes.append((host, int(port)))
|
||||
else:
|
||||
sentinel_nodes.append((h, 26379))
|
||||
|
||||
master_name = os.getenv("REDIS_MASTER_NAME", "prod-master")
|
||||
password = os.getenv("REDIS_PASSWORD", None)
|
||||
|
||||
66
ops/build-and-push-health-agent.sh
Executable file
66
ops/build-and-push-health-agent.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
# Builds the health-agent Docker image and pushes it to Harbor as a release candidate.
|
||||
# Use this to bypass the CI pipeline for manual builds/hotfixes.
|
||||
#
|
||||
# Usage (run from Environment_Monitoring/ root):
|
||||
# HARBOR_CI_TOKEN=<token> ./ops/build-and-push-health-agent.sh
|
||||
#
|
||||
# Optional env vars:
|
||||
# HARBOR_REGISTRY (default: registry.tarla.io)
|
||||
# HARBOR_PROJECT (default: iklimco)
|
||||
# HARBOR_CI_USER (default: robot-ci-push-iklimco)
|
||||
# TAG_SUFFIX (default: -rc)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
|
||||
HARBOR_REGISTRY="${HARBOR_REGISTRY:-registry.tarla.io}"
|
||||
HARBOR_PROJECT="${HARBOR_PROJECT:-iklimco}"
|
||||
HARBOR_CI_USER="${HARBOR_CI_USER:-robot-ci-push-iklimco}"
|
||||
TAG_SUFFIX="${TAG_SUFFIX:--rc}"
|
||||
|
||||
log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
die() { echo "[$(date +%H:%M:%S)] ERROR: $*" >&2; exit 1; }
|
||||
|
||||
sep() {
|
||||
local title="$*"
|
||||
local line
|
||||
line=$(printf '─%.0s' {1..70})
|
||||
echo
|
||||
echo "$line"
|
||||
printf ' %s\n' "$title"
|
||||
echo "$line"
|
||||
}
|
||||
|
||||
[[ -z "${HARBOR_CI_TOKEN:-}" ]] && die "HARBOR_CI_TOKEN env var is required."
|
||||
[[ -f "health-agent/pyproject.toml" ]] || die "health-agent/pyproject.toml not found — run from Environment_Monitoring/ root."
|
||||
[[ -f "health-agent/Dockerfile" ]] || die "health-agent/Dockerfile not found."
|
||||
|
||||
VERSION=$(sed -n 's/^version = "\(.*\)"/\1/p' health-agent/pyproject.toml)
|
||||
[[ -n "$VERSION" ]] || die "Could not determine version from health-agent/pyproject.toml"
|
||||
|
||||
IMAGE="${HARBOR_REGISTRY}/${HARBOR_PROJECT}/health-agent:${VERSION}${TAG_SUFFIX}"
|
||||
|
||||
sep "health-agent → ${IMAGE}"
|
||||
|
||||
log "Logging in to ${HARBOR_REGISTRY} as ${HARBOR_CI_USER}"
|
||||
echo "$HARBOR_CI_TOKEN" | docker login "$HARBOR_REGISTRY" -u "$HARBOR_CI_USER" --password-stdin
|
||||
log "✔ Harbor login successful"
|
||||
|
||||
log "Building..."
|
||||
docker build -t "$IMAGE" health-agent/
|
||||
|
||||
log "Pushing..."
|
||||
docker push "$IMAGE"
|
||||
docker pull -q "$IMAGE"
|
||||
DIGEST=$(docker image inspect "$IMAGE" --format '{{index .RepoDigests 0}}')
|
||||
|
||||
log "✔ Pushed: ${IMAGE}"
|
||||
|
||||
sep "Promotion Manifest — write to health-agent/deploy/prod.env on prod-env branch"
|
||||
echo
|
||||
echo " SOURCE_IMAGE_DIGEST=${DIGEST}"
|
||||
echo " PROD_IMAGE_TAG=${VERSION}"
|
||||
echo
|
||||
Loading…
x
Reference in New Issue
Block a user