fix(health-agent): check all 3 patroni node configs on storagebox; switch ping monitors to TCP port 22 (ICMP blocked from Docker)
This commit is contained in:
parent
fa7ed41063
commit
94e6b57c52
@ -246,10 +246,11 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to create DNS monitor {m_name}: {e}")
|
logger.warning(f"Failed to create DNS monitor {m_name}: {e}")
|
||||||
|
|
||||||
# 5. Ping Monitors (generated from nodes config)
|
# 5. TCP Port Monitors (generated from nodes config; ICMP is blocked from Docker, use TCP SSH port)
|
||||||
ping_cfg = config.get("ping_monitors", {})
|
ping_cfg = config.get("ping_monitors", {})
|
||||||
ping_interval = ping_cfg.get("interval", 60)
|
ping_interval = ping_cfg.get("interval", 60)
|
||||||
ping_retries = ping_cfg.get("max_retries", 1)
|
ping_retries = ping_cfg.get("max_retries", 1)
|
||||||
|
ping_port = ping_cfg.get("port", 22)
|
||||||
env_nodes = config.get("nodes", {}).get(env_name, {})
|
env_nodes = config.get("nodes", {}).get(env_name, {})
|
||||||
|
|
||||||
for i, node in enumerate(env_nodes.get("service", []), 1):
|
for i, node in enumerate(env_nodes.get("service", []), 1):
|
||||||
@ -260,16 +261,17 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
||||||
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
||||||
|
|
||||||
logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
|
logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
if m_name in existing_monitors:
|
if m_name in existing_monitors:
|
||||||
logger.info(f"Monitor {m_name} already exists.")
|
logger.info(f"Monitor {m_name} already exists.")
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"type": MonitorType.PING,
|
"type": MonitorType.PORT,
|
||||||
"name": m_name,
|
"name": m_name,
|
||||||
"hostname": ip,
|
"hostname": ip,
|
||||||
|
"port": ping_port,
|
||||||
"interval": ping_interval,
|
"interval": ping_interval,
|
||||||
"maxretries": ping_retries,
|
"maxretries": ping_retries,
|
||||||
}
|
}
|
||||||
@ -278,9 +280,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
if notif_ids:
|
if notif_ids:
|
||||||
kwargs["notificationIDList"] = notif_ids
|
kwargs["notificationIDList"] = notif_ids
|
||||||
api.add_monitor(**kwargs)
|
api.add_monitor(**kwargs)
|
||||||
logger.info(f"Created Ping monitor: {m_name}")
|
logger.info(f"Created TCP port monitor: {m_name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
|
logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
|
||||||
|
|
||||||
for i, node in enumerate(env_nodes.get("db", []), 1):
|
for i, node in enumerate(env_nodes.get("db", []), 1):
|
||||||
m_name = f"Ext Ping Db{i:02d}"
|
m_name = f"Ext Ping Db{i:02d}"
|
||||||
@ -290,16 +292,17 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
||||||
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
||||||
|
|
||||||
logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
|
logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
if m_name in existing_monitors:
|
if m_name in existing_monitors:
|
||||||
logger.info(f"Monitor {m_name} already exists.")
|
logger.info(f"Monitor {m_name} already exists.")
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"type": MonitorType.PING,
|
"type": MonitorType.PORT,
|
||||||
"name": m_name,
|
"name": m_name,
|
||||||
"hostname": ip,
|
"hostname": ip,
|
||||||
|
"port": ping_port,
|
||||||
"interval": ping_interval,
|
"interval": ping_interval,
|
||||||
"maxretries": ping_retries,
|
"maxretries": ping_retries,
|
||||||
}
|
}
|
||||||
@ -308,9 +311,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
if notif_ids:
|
if notif_ids:
|
||||||
kwargs["notificationIDList"] = notif_ids
|
kwargs["notificationIDList"] = notif_ids
|
||||||
api.add_monitor(**kwargs)
|
api.add_monitor(**kwargs)
|
||||||
logger.info(f"Created Ping monitor: {m_name}")
|
logger.info(f"Created TCP port monitor: {m_name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
|
logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
|
||||||
|
|
||||||
# 6. Status Pages
|
# 6. Status Pages
|
||||||
if api:
|
if api:
|
||||||
|
|||||||
@ -10,7 +10,9 @@ def check_storagebox_mount():
|
|||||||
|
|
||||||
storagebox_path = os.getenv("STORAGEBOX_PATH", "/mnt/storagebox")
|
storagebox_path = os.getenv("STORAGEBOX_PATH", "/mnt/storagebox")
|
||||||
expected_files = [
|
expected_files = [
|
||||||
"patroni/patroni.yml",
|
"db/postgresql-01/config/patroni.yml",
|
||||||
|
"db/postgresql-02/config/patroni.yml",
|
||||||
|
"db/postgresql-03/config/patroni.yml",
|
||||||
"ssl/STAR.iklim.co.full.crt"
|
"ssl/STAR.iklim.co.full.crt"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user