From 58d5c24f412b1b09853e261ed4f9ebd70d1d41df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= Date: Fri, 26 Jun 2026 18:45:17 +0300 Subject: [PATCH] feat(health-agent): add CI/CD pipeline, Uptime Kuma setup, and runtime configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deploy workflows: - Integrate health-agent build (test) and image promotion (prod) into monitoring stack workflows - Add storagebox download of health-agent runtime (.env.monitoring.health-agent-runtime → health-agent/.env) and setup (.env.monitoring.health-agent-setup → health-agent/.env.setup) env files - Add "Run Uptime Kuma Setup" step: runs setup_uptime_kuma.py inside the built image only when uk_tokens.yml is missing, writes tokens to HEALTH_AGENT_CONFIG_GENERATED_DIR (/mnt/storagebox/monitoring/uk_generated) - Add health-agent/** and health-agent/deploy/prod.env path triggers to test and prod workflows respectively - Add HARBOR_CI_TOKEN login and HARBOR_PULL_TOKEN login before stack deploy in both workflows - Source health-agent/.env before docker stack deploy to expose HEALTH_AGENT_CONFIG_GENERATED_DIR Dockerfile: - Copy config/ and scripts/ into image so setup_uptime_kuma.py can run inside the container setup_uptime_kuma.py: - Load .env and .env.setup automatically via python-dotenv (no manual export needed) - Write uk_tokens.yml to config/generated/ (aligned with container volume mount) Health checks: - PATRONI_HOSTS and VAULT_HOSTS are now configurable via env vars (comma-separated host:port); no code change needed when node count changes - REDIS_SENTINEL_HOSTS now correctly parses host:port format; default updated to redis-sentinel:26379 - Fix NameError in check_patroni_cluster() caused by leftover node variable after loop refactor - Remove verify_ssl=False from Vault check; vault.iklim.co has a valid certificate Ops: - Add ops/build-and-push-health-agent.sh for manual bypass of CI pipeline - Add health-agent/deploy/prod.env template for prod image promotion manifest Project structure: - Move .env.example and .env.setup.example to health-agent/env-example/ (root .gitignore excludes health-agent/.env*) - Add root .gitignore: excludes uk_tokens.yml, __pycache__, .venv, and env files - Remove health-agent/.gitignore (superseded by root .gitignore) --- .gitea/workflows/deploy-monitoring-prod.yml | 59 ++++++++++++++++- .gitea/workflows/deploy-monitoring-test.yml | 54 ++++++++++++++- .gitignore | 6 ++ common-functions-base.sh | 3 +- health-agent/.gitignore | 3 - health-agent/Dockerfile | 2 + health-agent/README.md | 26 +++++--- health-agent/deploy/prod.env | 2 + health-agent/{ => env-example}/.env.example | 5 ++ .../{ => env-example}/.env.setup.example | 0 health-agent/scripts/setup_uptime_kuma.py | 7 +- health-agent/src/health_agent/checks/http.py | 23 +++++-- .../src/health_agent/checks/redis_sentinel.py | 11 +++- ops/build-and-push-health-agent.sh | 66 +++++++++++++++++++ 14 files changed, 241 insertions(+), 26 deletions(-) create mode 100644 .gitignore delete mode 100644 health-agent/.gitignore create mode 100644 health-agent/deploy/prod.env rename health-agent/{ => env-example}/.env.example (74%) rename health-agent/{ => env-example}/.env.setup.example (100%) create mode 100755 ops/build-and-push-health-agent.sh diff --git a/.gitea/workflows/deploy-monitoring-prod.yml b/.gitea/workflows/deploy-monitoring-prod.yml index b529d12..10bcaf9 100644 --- a/.gitea/workflows/deploy-monitoring-prod.yml +++ b/.gitea/workflows/deploy-monitoring-prod.yml @@ -6,6 +6,7 @@ on: - prod-env paths: - 'docker-stack-monitoring.yml' + - 'health-agent/deploy/prod.env' - 'swag/**' - '.gitea/workflows/deploy-monitoring-prod.yml' @@ -47,17 +48,73 @@ jobs: run: | source ./common-functions-base.sh export SPRING_PROFILES_ACTIVE=PROD - rm -f .env .env.secrets.swag + rm -f .env .env.secrets.swag health-agent/.env health-agent/.env.setup scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env ./.env scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env.secrets.swag ./.env.secrets.swag + scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env.monitoring.health-agent-runtime ./health-agent/.env + scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:prod/secrets/iklim.co/.env.monitoring.health-agent-setup ./health-agent/.env.setup require_env_file ./.env "Main env file" require_env_file ./.env.secrets.swag "SWAG secrets" + require_env_file ./health-agent/.env "Health-agent runtime env" + require_env_file ./health-agent/.env.setup "Health-agent setup env" + + - name: Promote Health Agent Image + run: | + source ./common-functions-base.sh + export SPRING_PROFILES_ACTIVE=PROD + source_env_file ./health-agent/deploy/prod.env + if [ -z "${SOURCE_IMAGE_DIGEST:-}" ] || [ -z "${PROD_IMAGE_TAG:-}" ]; then + log_message "INFO" "health-agent/deploy/prod.env is empty — skipping health-agent promotion" + exit 0 + fi + case "$SOURCE_IMAGE_DIGEST" in + registry.tarla.io/iklimco/health-agent@sha256:*) ;; + *) log_message "ERROR" "SOURCE_IMAGE_DIGEST must be registry.tarla.io/iklimco/health-agent@sha256:"; exit 1 ;; + esac + case "$PROD_IMAGE_TAG" in + *-rc*) log_message "ERROR" "PROD_IMAGE_TAG must not contain -rc"; exit 1 ;; + esac + PROD_IMAGE="registry.tarla.io/iklimco/health-agent:${PROD_IMAGE_TAG}" + echo "${{ secrets.HARBOR_CI_TOKEN }}" | \ + docker login registry.tarla.io -u robot-ci-push-iklimco --password-stdin + docker pull "${SOURCE_IMAGE_DIGEST}" + docker tag "${SOURCE_IMAGE_DIGEST}" "${PROD_IMAGE}" + docker push "${PROD_IMAGE}" + if grep -q "^IMAGE_HEALTH_AGENT=" .env; then + sed -i "s|^IMAGE_HEALTH_AGENT=.*$|IMAGE_HEALTH_AGENT=health-agent:${PROD_IMAGE_TAG}|" .env + else + echo "IMAGE_HEALTH_AGENT=health-agent:${PROD_IMAGE_TAG}" >> .env + fi + echo "HEALTH_AGENT_IMAGE=${PROD_IMAGE}" >> $GITHUB_ENV + log_message "SUCCESS" "Promoted: ${PROD_IMAGE}" + + - name: Run Uptime Kuma Setup + run: | + source ./common-functions-base.sh + export SPRING_PROFILES_ACTIVE=PROD + source_env_file ./health-agent/.env + mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}" + if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then + docker run --rm \ + -v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \ + --env-file "$(pwd)/health-agent/.env" \ + --env-file "$(pwd)/health-agent/.env.setup" \ + "${HEALTH_AGENT_IMAGE}" \ + python scripts/setup_uptime_kuma.py + log_message "SUCCESS" "Uptime Kuma setup complete, tokens written to ${HEALTH_AGENT_CONFIG_GENERATED_DIR}" + else + log_message "INFO" "uk_tokens.yml already exists, skipping Uptime Kuma setup" + fi - name: Deploy Monitoring Stack run: | source ./common-functions-base.sh export SPRING_PROFILES_ACTIVE=PROD source_env_file ./.env + source_env_file ./health-agent/.env + export HEALTH_AGENT_ENV_FILE="$(pwd)/health-agent/.env" + echo "${{ secrets.HARBOR_PULL_TOKEN }}" | \ + docker login registry.tarla.io -u robot-swarm-pull-iklimco --password-stdin # Remove leftover dozzle_users Docker secret from previous setup docker secret rm dozzle_users 2>/dev/null || true diff --git a/.gitea/workflows/deploy-monitoring-test.yml b/.gitea/workflows/deploy-monitoring-test.yml index 07c532b..f96d7f5 100644 --- a/.gitea/workflows/deploy-monitoring-test.yml +++ b/.gitea/workflows/deploy-monitoring-test.yml @@ -6,6 +6,7 @@ on: - test paths: - 'docker-stack-monitoring.yml' + - 'health-agent/**' - 'swag/**' - '.gitea/workflows/deploy-monitoring-test.yml' @@ -43,18 +44,67 @@ jobs: run: | source ./common-functions-base.sh export SPRING_PROFILES_ACTIVE=TEST - rm -f .env .env.secrets.swag + rm -f .env .env.secrets.swag health-agent/.env health-agent/.env.setup scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env ./.env scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env.secrets.swag ./.env.secrets.swag + scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env.monitoring.health-agent-runtime ./health-agent/.env + scp -P 23 ${{ vars.STORAGEBOX_USER }}@${{ vars.STORAGEBOX_USER }}.your-storagebox.de:test/secrets/iklim.co/.env.monitoring.health-agent-setup ./health-agent/.env.setup require_env_file ./.env "Main env file" require_env_file ./.env.secrets.swag "SWAG secrets" + require_env_file ./health-agent/.env "Health-agent runtime env" + require_env_file ./health-agent/.env.setup "Health-agent setup env" + + - name: Build and Push Health Agent + run: | + source ./common-functions-base.sh + export SPRING_PROFILES_ACTIVE=TEST + VERSION=$(sed -n 's/^version = "\(.*\)"/\1/p' health-agent/pyproject.toml) + IMAGE_TAG="health-agent:${VERSION}-rc" + IMAGE_FULL="registry.tarla.io/iklimco/${IMAGE_TAG}" + echo "${{ secrets.HARBOR_CI_TOKEN }}" | \ + docker login registry.tarla.io -u robot-ci-push-iklimco --password-stdin + docker build -t "${IMAGE_FULL}" health-agent/ + docker push "${IMAGE_FULL}" + docker pull -q "${IMAGE_FULL}" + DIGEST=$(docker image inspect "${IMAGE_FULL}" --format '{{index .RepoDigests 0}}') + if grep -q "^IMAGE_HEALTH_AGENT=" .env; then + sed -i "s|^IMAGE_HEALTH_AGENT=.*$|IMAGE_HEALTH_AGENT=${IMAGE_TAG}|" .env + else + echo "IMAGE_HEALTH_AGENT=${IMAGE_TAG}" >> .env + fi + echo "HEALTH_AGENT_IMAGE=${IMAGE_FULL}" >> $GITHUB_ENV + log_message "SUCCESS" "Pushed: ${IMAGE_FULL}" + log_message "INFO" "Promotion manifest — write to health-agent/deploy/prod.env on prod-env branch:" + echo " SOURCE_IMAGE_DIGEST=${DIGEST}" + echo " PROD_IMAGE_TAG=${VERSION}" + + - name: Run Uptime Kuma Setup + run: | + source ./common-functions-base.sh + export SPRING_PROFILES_ACTIVE=TEST + source_env_file ./health-agent/.env + mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}" + if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then + docker run --rm \ + -v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \ + --env-file "$(pwd)/health-agent/.env" \ + --env-file "$(pwd)/health-agent/.env.setup" \ + "${HEALTH_AGENT_IMAGE}" \ + python scripts/setup_uptime_kuma.py + log_message "SUCCESS" "Uptime Kuma setup complete, tokens written to ${HEALTH_AGENT_CONFIG_GENERATED_DIR}" + else + log_message "INFO" "uk_tokens.yml already exists, skipping Uptime Kuma setup" + fi - name: Deploy Monitoring Stack run: | source ./common-functions-base.sh export SPRING_PROFILES_ACTIVE=TEST source_env_file ./.env - + source_env_file ./health-agent/.env + export HEALTH_AGENT_ENV_FILE="$(pwd)/health-agent/.env" + echo "${{ secrets.HARBOR_PULL_TOKEN }}" | \ + docker login registry.tarla.io -u robot-swarm-pull-iklimco --password-stdin docker stack deploy \ --with-registry-auth \ --resolve-image changed \ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f960e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +health-agent/config/generated/uk_tokens.yml +*__pycache__* +health-agent/.venv/* +health-agent/prod-env/ +health-agent/test-env/ +health-agent/.env* diff --git a/common-functions-base.sh b/common-functions-base.sh index b9a7e38..aed8052 100644 --- a/common-functions-base.sh +++ b/common-functions-base.sh @@ -67,9 +67,8 @@ lookup_env_value() { } # Matematiksel veya mantıksal işlem gerektiren env değerlerini hesaplar. -# Örn: Milisaniye cinsinden JWT süresini saniyeye çevirir. refresh_calculated_env_vars() { - export JWT_ACCESS_TOKEN_EXPIRATION_SEC=$(( JWT_ACCESS_TOKEN_EXPIRATION / 1000 )) + } # Tüm çevre dosyalarını (ana env, ortak sırlar ve servis sırları) tazeleyerek yükler. diff --git a/health-agent/.gitignore b/health-agent/.gitignore deleted file mode 100644 index c5f529c..0000000 --- a/health-agent/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -config/generated/uk_tokens.yml -.env -.env.setup diff --git a/health-agent/Dockerfile b/health-agent/Dockerfile index 14b6927..fbc2659 100644 --- a/health-agent/Dockerfile +++ b/health-agent/Dockerfile @@ -6,6 +6,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf COPY pyproject.toml ./ COPY src/ ./src/ +COPY config/ ./config/ +COPY scripts/ ./scripts/ RUN pip install --no-cache-dir . ENV PYTHONPATH=/app/src diff --git a/health-agent/README.md b/health-agent/README.md index c09c33b..5ab86a8 100644 --- a/health-agent/README.md +++ b/health-agent/README.md @@ -52,7 +52,7 @@ Environment_Monitoring/health-agent/ ├── Dockerfile ├── pyproject.toml ├── .env.example # health-agent runtime değişkenleri (credentials, ENV, CLUSTER_SIZE_*) -└── .env.setup.example # setup script değişkenleri (UK_API_KEY, Slack webhook'ları) +└── .env.setup.example # setup script değişkenleri (UK_URL, UK_USER, UK_PASS, Slack webhook'ları) ``` --- @@ -76,6 +76,8 @@ Environment_Monitoring/health-agent/ | `EXTERNAL_DOMAIN` | Base domain — `iklim.co` in both environments | | `EXTERNAL_SUBDOMAIN_SUFFIX` | Subdomain suffix — empty for prod, `-test` for test → `api-test.iklim.co` | | `SLACK_WEBHOOK_IKLIM_{ENV}_OPS` | Direct Slack webhook for container crash/OOM events — e.g. `SLACK_WEBHOOK_IKLIM_PROD_OPS` | +| `PATRONI_HOSTS` | Patroni node list (comma-separated `host:port`) — e.g. `patroni-01:8008,patroni-02:8008` | +| `VAULT_HOSTS` | Vault node subdomain list (comma-separated) — e.g. `vault-1,vault-2,vault-3` | | `RABBITMQ_USER` / `RABBITMQ_PASS` | RabbitMQ management credentials | | `MONGO_URI` | MongoDB connection URI | | `REDIS_PASSWORD` | Redis / Sentinel password | @@ -104,13 +106,19 @@ Push token'ları `config/generated/uk_tokens.yml`'den otomatik okunur — bu dos Health-agent deploy edilmeden önce kurulum script'i çalıştırılır. Script, `monitors.yml`'i okuyarak tüm monitor, tag, group ve status page'leri Uptime Kuma'da oluşturur; push token'larını `config/generated/uk_tokens.yml`'e yazar. -Script [`uptime-kuma-api`](https://pypi.org/project/uptime-kuma-api/) kütüphanesini kullanır. API key authentication desteği implementasyon öncesi doğrulanmalıdır; desteklenmiyorsa `requests` + `Authorization: Bearer ` ile REST API doğrudan çağrılır. +Script `uptime-kuma-api-v2` kütüphanesini kullanır; Socket.IO üzerinden username/password ile bağlanır. ```bash cd Environment_Monitoring/health-agent -# setup değişkenlerini doldur -cp .env.setup.example .env.setup +# Python 3.12 venv oluştur ve aktive et +python3.12 -m venv .venv +source .venv/bin/activate +pip install -e ".[dev]" + +# runtime ve setup değişkenlerini doldur +cp .env.example .env # ENV, EXTERNAL_DOMAIN vb. +cp .env.setup.example .env.setup # UK_URL, UK_USER, UK_PASS, Slack webhook'ları # önce dry-run ile ne yapılacağını gör python scripts/setup_uptime_kuma.py --dry-run @@ -118,10 +126,8 @@ python scripts/setup_uptime_kuma.py --dry-run # tüm kaynakları oluştur python scripts/setup_uptime_kuma.py -# sadece belirli bileşenleri güncelle -python scripts/setup_uptime_kuma.py --only monitors -python scripts/setup_uptime_kuma.py --only notifications -python scripts/setup_uptime_kuma.py --only status-page +# sadece belirli bir monitörü işle (monitor adıyla) +python scripts/setup_uptime_kuma.py --only SWARM-CLUSTER ``` Script idempotent çalışır — CI/CD pipeline'ında her deploy'da güvenle tetiklenebilir. @@ -141,6 +147,10 @@ Planlı bakım/deploy sırasında etkilenecek group için Uptime Kuma'da Mainten ```bash cd Environment_Monitoring/health-agent +# Python 3.12 venv oluştur ve aktive et +python3.12 -m venv .venv +source .venv/bin/activate + # bağımlılıkları kur pip install -e ".[dev]" diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env new file mode 100644 index 0000000..92297cd --- /dev/null +++ b/health-agent/deploy/prod.env @@ -0,0 +1,2 @@ +SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:fadf229d4423075d2871f9dc4a5a0afdf6dfe7c5fcd04d866b2d6d6fe8942b56 +PROD_IMAGE_TAG=0.1.0 \ No newline at end of file diff --git a/health-agent/.env.example b/health-agent/env-example/.env.example similarity index 74% rename from health-agent/.env.example rename to health-agent/env-example/.env.example index 20a03a9..6574867 100644 --- a/health-agent/.env.example +++ b/health-agent/env-example/.env.example @@ -9,6 +9,8 @@ EXTERNAL_DOMAIN=iklim.co EXTERNAL_SUBDOMAIN_SUFFIX= UK_PUSH_URL_BASE=https://uptime.tarla.io/api/push SLACK_WEBHOOK_IKLIM_PROD_OPS= +PATRONI_HOSTS=patroni-01:8008,patroni-02:8008,patroni-03:8008 +VAULT_HOSTS=vault RABBITMQ_USER= RABBITMQ_PASS= MONGO_URI= @@ -17,3 +19,6 @@ REDIS_MASTER_NAME= REDIS_SENTINEL_HOSTS= STORAGEBOX_PATH= APISIX_ADMIN_KEY= +PATRONI_HOSTS= +VAULT_HOSTS= +HEALTH_AGENT_CONFIG_GENERATED_DIR= \ No newline at end of file diff --git a/health-agent/.env.setup.example b/health-agent/env-example/.env.setup.example similarity index 100% rename from health-agent/.env.setup.example rename to health-agent/env-example/.env.setup.example diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py index bbeafc9..0bc5a35 100644 --- a/health-agent/scripts/setup_uptime_kuma.py +++ b/health-agent/scripts/setup_uptime_kuma.py @@ -2,11 +2,16 @@ import os import argparse import yaml import logging +from dotenv import load_dotenv from uptime_kuma_api import UptimeKumaApi, MonitorType logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger("uk-setup") +_root = os.path.join(os.path.dirname(__file__), "..") +load_dotenv(os.path.join(_root, ".env")) +load_dotenv(os.path.join(_root, ".env.setup")) + def format_str(text, env_name, project): if not isinstance(text, str): return text @@ -118,7 +123,7 @@ def setup_uptime_kuma(dry_run=False, only=None): logger.warning(f"Status page ops failed: {e}") # 4. Write tokens to uk_tokens.yml - token_file = os.path.join(os.path.dirname(__file__), "..", "config", "uk_tokens.yml") + token_file = os.path.join(os.path.dirname(__file__), "..", "config", "generated", "uk_tokens.yml") if not dry_run: with open(token_file, "w") as f: yaml.dump(tokens, f) diff --git a/health-agent/src/health_agent/checks/http.py b/health-agent/src/health_agent/checks/http.py index 14b4dfb..f7001c0 100644 --- a/health-agent/src/health_agent/checks/http.py +++ b/health-agent/src/health_agent/checks/http.py @@ -29,19 +29,27 @@ def http_check(url, expected_status=None, auth=None, verify_ssl=True, timeout=5, return False, None, ping_ms, str(e) def check_patroni_cluster(): - nodes = ["patroni-01", "patroni-02", "patroni-03"] + hosts_env = os.getenv("PATRONI_HOSTS", "patroni-01:8008,patroni-02:8008,patroni-03:8008") + nodes = [] + for h in hosts_env.split(","): + h = h.strip() + if ":" in h: + host, port = h.rsplit(":", 1) + nodes.append((host, int(port))) + else: + nodes.append((h, 8008)) cluster_data = None error_msg = "All Patroni nodes unreachable" start_t = time.time() - - for node in nodes: - url = f"http://{node}:8008/cluster" + + for host, port in nodes: + url = f"http://{host}:{port}/cluster" ok, resp, _, err = http_check(url, timeout=3) if ok and resp: cluster_data = resp.json() break elif err: - error_msg = f"{node} error: {err}" + error_msg = f"{host}:{port} error: {err}" ping_ms = int((time.time() - start_t) * 1000) @@ -117,7 +125,8 @@ def check_apisix(): push("APISIX-GATEWAY", "down", f"admin API unreachable: {err or resp.status_code}", ping_ms) def check_vault(): - nodes = ["vault-1", "vault-2", "vault-3"] + hosts_env = os.getenv("VAULT_HOSTS", "vault") + nodes = [h.strip() for h in hosts_env.split(",")] domain = os.getenv("EXTERNAL_DOMAIN", "iklim.co") unsealed_count = 0 total = len(nodes) @@ -127,7 +136,7 @@ def check_vault(): start_t = time.time() for node in nodes: url = f"https://{node}.{domain}:8200/v1/sys/health" - ok, resp, ms, err = http_check(url, verify_ssl=False, expected_status=[200, 429, 473]) + ok, resp, ms, err = http_check(url, expected_status=[200, 429, 473]) max_ping = max(max_ping, ms) if resp: diff --git a/health-agent/src/health_agent/checks/redis_sentinel.py b/health-agent/src/health_agent/checks/redis_sentinel.py index 7b97410..0a05ded 100644 --- a/health-agent/src/health_agent/checks/redis_sentinel.py +++ b/health-agent/src/health_agent/checks/redis_sentinel.py @@ -9,8 +9,15 @@ logger = logging.getLogger(__name__) def check_redis_sentinel(): start_t = time.time() - hosts = os.getenv("REDIS_SENTINEL_HOSTS", "redis-sentinel-01,redis-sentinel-02,redis-sentinel-03") - sentinel_nodes = [(h.strip(), 26379) for h in hosts.split(",")] + hosts = os.getenv("REDIS_SENTINEL_HOSTS", "redis-sentinel:26379") + sentinel_nodes = [] + for h in hosts.split(","): + h = h.strip() + if ":" in h: + host, port = h.rsplit(":", 1) + sentinel_nodes.append((host, int(port))) + else: + sentinel_nodes.append((h, 26379)) master_name = os.getenv("REDIS_MASTER_NAME", "prod-master") password = os.getenv("REDIS_PASSWORD", None) diff --git a/ops/build-and-push-health-agent.sh b/ops/build-and-push-health-agent.sh new file mode 100755 index 0000000..a763b58 --- /dev/null +++ b/ops/build-and-push-health-agent.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Builds the health-agent Docker image and pushes it to Harbor as a release candidate. +# Use this to bypass the CI pipeline for manual builds/hotfixes. +# +# Usage (run from Environment_Monitoring/ root): +# HARBOR_CI_TOKEN= ./ops/build-and-push-health-agent.sh +# +# Optional env vars: +# HARBOR_REGISTRY (default: registry.tarla.io) +# HARBOR_PROJECT (default: iklimco) +# HARBOR_CI_USER (default: robot-ci-push-iklimco) +# TAG_SUFFIX (default: -rc) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$(cd "$SCRIPT_DIR/.." && pwd)" + +HARBOR_REGISTRY="${HARBOR_REGISTRY:-registry.tarla.io}" +HARBOR_PROJECT="${HARBOR_PROJECT:-iklimco}" +HARBOR_CI_USER="${HARBOR_CI_USER:-robot-ci-push-iklimco}" +TAG_SUFFIX="${TAG_SUFFIX:--rc}" + +log() { echo "[$(date +%H:%M:%S)] $*"; } +die() { echo "[$(date +%H:%M:%S)] ERROR: $*" >&2; exit 1; } + +sep() { + local title="$*" + local line + line=$(printf '─%.0s' {1..70}) + echo + echo "$line" + printf ' %s\n' "$title" + echo "$line" +} + +[[ -z "${HARBOR_CI_TOKEN:-}" ]] && die "HARBOR_CI_TOKEN env var is required." +[[ -f "health-agent/pyproject.toml" ]] || die "health-agent/pyproject.toml not found — run from Environment_Monitoring/ root." +[[ -f "health-agent/Dockerfile" ]] || die "health-agent/Dockerfile not found." + +VERSION=$(sed -n 's/^version = "\(.*\)"/\1/p' health-agent/pyproject.toml) +[[ -n "$VERSION" ]] || die "Could not determine version from health-agent/pyproject.toml" + +IMAGE="${HARBOR_REGISTRY}/${HARBOR_PROJECT}/health-agent:${VERSION}${TAG_SUFFIX}" + +sep "health-agent → ${IMAGE}" + +log "Logging in to ${HARBOR_REGISTRY} as ${HARBOR_CI_USER}" +echo "$HARBOR_CI_TOKEN" | docker login "$HARBOR_REGISTRY" -u "$HARBOR_CI_USER" --password-stdin +log "✔ Harbor login successful" + +log "Building..." +docker build -t "$IMAGE" health-agent/ + +log "Pushing..." +docker push "$IMAGE" +docker pull -q "$IMAGE" +DIGEST=$(docker image inspect "$IMAGE" --format '{{index .RepoDigests 0}}') + +log "✔ Pushed: ${IMAGE}" + +sep "Promotion Manifest — write to health-agent/deploy/prod.env on prod-env branch" +echo +echo " SOURCE_IMAGE_DIGEST=${DIGEST}" +echo " PROD_IMAGE_TAG=${VERSION}" +echo