Some checks failed
Deploy Environment Monitoring to Production Environment / deploy (push) Failing after 10s
Deploy workflows: - Integrate health-agent build (test) and image promotion (prod) into monitoring stack workflows - Add storagebox download of health-agent runtime (.env.monitoring.health-agent-runtime → health-agent/.env) and setup (.env.monitoring.health-agent-setup → health-agent/.env.setup) env files - Add "Run Uptime Kuma Setup" step: runs setup_uptime_kuma.py inside the built image only when uk_tokens.yml is missing, writes tokens to HEALTH_AGENT_CONFIG_GENERATED_DIR (/mnt/storagebox/monitoring/uk_generated) - Add health-agent/** and health-agent/deploy/prod.env path triggers to test and prod workflows respectively - Add HARBOR_CI_TOKEN login and HARBOR_PULL_TOKEN login before stack deploy in both workflows - Source health-agent/.env before docker stack deploy to expose HEALTH_AGENT_CONFIG_GENERATED_DIR Dockerfile: - Copy config/ and scripts/ into image so setup_uptime_kuma.py can run inside the container setup_uptime_kuma.py: - Load .env and .env.setup automatically via python-dotenv (no manual export needed) - Write uk_tokens.yml to config/generated/ (aligned with container volume mount) Health checks: - PATRONI_HOSTS and VAULT_HOSTS are now configurable via env vars (comma-separated host:port); no code change needed when node count changes - REDIS_SENTINEL_HOSTS now correctly parses host:port format; default updated to redis-sentinel:26379 - Fix NameError in check_patroni_cluster() caused by leftover node variable after loop refactor - Remove verify_ssl=False from Vault check; vault.iklim.co has a valid certificate Ops: - Add ops/build-and-push-health-agent.sh for manual bypass of CI pipeline - Add health-agent/deploy/prod.env template for prod image promotion manifest Project structure: - Move .env.example and .env.setup.example to health-agent/env-example/ (root .gitignore excludes health-agent/.env*) - Add root .gitignore: excludes uk_tokens.yml, __pycache__, .venv, and env files - Remove health-agent/.gitignore (superseded by root .gitignore)
144 lines
5.3 KiB
Python
144 lines
5.3 KiB
Python
import os
|
|
import argparse
|
|
import yaml
|
|
import logging
|
|
from dotenv import load_dotenv
|
|
from uptime_kuma_api import UptimeKumaApi, MonitorType
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
logger = logging.getLogger("uk-setup")
|
|
|
|
_root = os.path.join(os.path.dirname(__file__), "..")
|
|
load_dotenv(os.path.join(_root, ".env"))
|
|
load_dotenv(os.path.join(_root, ".env.setup"))
|
|
|
|
def format_str(text, env_name, project):
|
|
if not isinstance(text, str):
|
|
return text
|
|
return text.replace("{env}", env_name).replace("{project}", project)
|
|
|
|
def setup_uptime_kuma(dry_run=False, only=None):
|
|
env_name = os.getenv("ENV", "test")
|
|
|
|
config_path = os.path.join(os.path.dirname(__file__), "..", "config", "monitors.yml")
|
|
with open(config_path, "r") as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
project = config.get("project", "iklim")
|
|
|
|
kuma_url = os.getenv("UK_URL", "http://localhost:3001")
|
|
kuma_user = os.getenv("UK_USER", "admin")
|
|
kuma_pass = os.getenv("UK_PASS", "admin")
|
|
|
|
api = None
|
|
if not dry_run:
|
|
logger.info(f"Connecting to Uptime Kuma at {kuma_url}...")
|
|
try:
|
|
api = UptimeKumaApi(kuma_url)
|
|
api.login(kuma_user, kuma_pass)
|
|
except Exception as e:
|
|
logger.error(f"Login failed: {e}")
|
|
return
|
|
|
|
existing_monitors = {}
|
|
if api:
|
|
try:
|
|
for m in api.get_monitors():
|
|
existing_monitors[m['name']] = m
|
|
except Exception as e:
|
|
logger.error(f"Failed to get monitors: {e}")
|
|
|
|
# 1. Process Groups
|
|
group_map = {}
|
|
for g in config.get("groups", []):
|
|
raw_name = g["name"]
|
|
formatted_name = f"{project} [{env_name}] {raw_name}"
|
|
|
|
logger.info(f"Processing group: {formatted_name}")
|
|
if not dry_run:
|
|
if formatted_name not in existing_monitors:
|
|
logger.info(f"Creating group monitor: {formatted_name}")
|
|
res = api.add_monitor(type=MonitorType.GROUP, name=formatted_name)
|
|
group_map[raw_name] = res['monitorID']
|
|
else:
|
|
group_map[raw_name] = existing_monitors[formatted_name]['id']
|
|
|
|
tokens = {}
|
|
|
|
# 2. Push Monitors
|
|
for pm in config.get("push_monitors", []):
|
|
m_name = pm["name"]
|
|
if only and m_name != only:
|
|
continue
|
|
|
|
m_interval = pm.get("interval", 60)
|
|
|
|
parent_group_id = None
|
|
for g in config.get("groups", []):
|
|
if m_name in g.get("children", []):
|
|
parent_group_id = group_map.get(g["name"])
|
|
break
|
|
|
|
logger.info(f"Processing push monitor: {m_name}")
|
|
if not dry_run:
|
|
if m_name in existing_monitors:
|
|
logger.info(f"Monitor {m_name} already exists.")
|
|
m_id = existing_monitors[m_name]['id']
|
|
token = existing_monitors[m_name]['pushToken']
|
|
tokens[m_name] = token
|
|
|
|
if parent_group_id and existing_monitors[m_name].get('parent') != parent_group_id:
|
|
api.edit_monitor(m_id, parent=parent_group_id)
|
|
else:
|
|
logger.info(f"Creating push monitor: {m_name}")
|
|
result = api.add_monitor(
|
|
type=MonitorType.PUSH,
|
|
name=m_name,
|
|
interval=m_interval,
|
|
parent=parent_group_id
|
|
)
|
|
m_id = result['monitorID']
|
|
|
|
# Fetch again to get pushToken
|
|
for m in api.get_monitors():
|
|
if m['id'] == m_id:
|
|
tokens[m_name] = m['pushToken']
|
|
break
|
|
else:
|
|
tokens[m_name] = "dummy_token_dry_run"
|
|
|
|
# 3. Process Status Pages
|
|
for sp in config.get("status_pages", []):
|
|
slug = format_str(sp["slug"], env_name, project)
|
|
title = format_str(sp["title"], env_name, project)
|
|
logger.info(f"Processing status page: {title} (slug: {slug})")
|
|
if not dry_run:
|
|
try:
|
|
pages = api.get_status_pages()
|
|
exists = any(p['slug'] == slug for p in pages)
|
|
if not exists:
|
|
logger.info(f"Creating status page: {slug}")
|
|
api.add_status_page(slug, title)
|
|
except Exception as e:
|
|
logger.warning(f"Status page ops failed: {e}")
|
|
|
|
# 4. Write tokens to uk_tokens.yml
|
|
token_file = os.path.join(os.path.dirname(__file__), "..", "config", "generated", "uk_tokens.yml")
|
|
if not dry_run:
|
|
with open(token_file, "w") as f:
|
|
yaml.dump(tokens, f)
|
|
logger.info(f"Saved push tokens to {token_file}")
|
|
else:
|
|
logger.info(f"[DRY-RUN] Would save {len(tokens)} tokens to {token_file}")
|
|
|
|
if api:
|
|
api.disconnect()
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Setup Uptime Kuma monitors")
|
|
parser.add_argument("--dry-run", action="store_true", help="Print actions without making changes")
|
|
parser.add_argument("--only", type=str, help="Only process a specific monitor by name")
|
|
args = parser.parse_args()
|
|
|
|
setup_uptime_kuma(dry_run=args.dry_run, only=args.only)
|