fix(health-agent): check all 3 patroni node configs on storagebox; switch ping monitors to TCP port 22 (ICMP blocked from Docker)

This commit is contained in:
Murat ÖZDEMİR 2026-06-26 21:54:49 +03:00
parent fa7ed41063
commit 94e6b57c52
2 changed files with 15 additions and 10 deletions

View File

@ -246,10 +246,11 @@ def setup_uptime_kuma(dry_run=False, only=None):
except Exception as e:
logger.warning(f"Failed to create DNS monitor {m_name}: {e}")
# 5. Ping Monitors (generated from nodes config)
# 5. TCP Port Monitors (generated from nodes config; ICMP is blocked from Docker, use TCP SSH port)
ping_cfg = config.get("ping_monitors", {})
ping_interval = ping_cfg.get("interval", 60)
ping_retries = ping_cfg.get("max_retries", 1)
ping_port = ping_cfg.get("port", 22)
env_nodes = config.get("nodes", {}).get(env_name, {})
for i, node in enumerate(env_nodes.get("service", []), 1):
@ -260,16 +261,17 @@ def setup_uptime_kuma(dry_run=False, only=None):
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
if not dry_run:
if m_name in existing_monitors:
logger.info(f"Monitor {m_name} already exists.")
else:
try:
kwargs = {
"type": MonitorType.PING,
"type": MonitorType.PORT,
"name": m_name,
"hostname": ip,
"port": ping_port,
"interval": ping_interval,
"maxretries": ping_retries,
}
@ -278,9 +280,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
if notif_ids:
kwargs["notificationIDList"] = notif_ids
api.add_monitor(**kwargs)
logger.info(f"Created Ping monitor: {m_name}")
logger.info(f"Created TCP port monitor: {m_name}")
except Exception as e:
logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
for i, node in enumerate(env_nodes.get("db", []), 1):
m_name = f"Ext Ping Db{i:02d}"
@ -290,16 +292,17 @@ def setup_uptime_kuma(dry_run=False, only=None):
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
if not dry_run:
if m_name in existing_monitors:
logger.info(f"Monitor {m_name} already exists.")
else:
try:
kwargs = {
"type": MonitorType.PING,
"type": MonitorType.PORT,
"name": m_name,
"hostname": ip,
"port": ping_port,
"interval": ping_interval,
"maxretries": ping_retries,
}
@ -308,9 +311,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
if notif_ids:
kwargs["notificationIDList"] = notif_ids
api.add_monitor(**kwargs)
logger.info(f"Created Ping monitor: {m_name}")
logger.info(f"Created TCP port monitor: {m_name}")
except Exception as e:
logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
# 6. Status Pages
if api:

View File

@ -10,7 +10,9 @@ def check_storagebox_mount():
storagebox_path = os.getenv("STORAGEBOX_PATH", "/mnt/storagebox")
expected_files = [
"patroni/patroni.yml",
"db/postgresql-01/config/patroni.yml",
"db/postgresql-02/config/patroni.yml",
"db/postgresql-03/config/patroni.yml",
"ssl/STAR.iklim.co.full.crt"
]