fix(health-agent): fix ping maxretries param and status page group lookup
Fix ping monitor creation error ('max_retries' is not a valid uptime-kuma-api param; correct name is 'maxretries'). Fix status pages never linking groups: re-fetching get_monitors() after add_monitor() races with WebSocket delivery so newly created groups are missing; use group_map populated in Section 1 directly instead.
This commit is contained in:
parent
95dd439a34
commit
8b10653ff4
@ -103,7 +103,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
type=NotificationType.SLACK,
|
type=NotificationType.SLACK,
|
||||||
name=notif_name,
|
name=notif_name,
|
||||||
isDefault=False,
|
isDefault=False,
|
||||||
webhookURL=webhook_url,
|
slackwebhookURL=webhook_url,
|
||||||
applyExisting=False
|
applyExisting=False
|
||||||
)
|
)
|
||||||
notification_map[notif_key] = res.get('id')
|
notification_map[notif_key] = res.get('id')
|
||||||
@ -138,6 +138,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
group_map[raw_name] = existing_monitors[formatted_name]['id']
|
group_map[raw_name] = existing_monitors[formatted_name]['id']
|
||||||
|
|
||||||
tokens = {}
|
tokens = {}
|
||||||
|
new_monitor_ids = {} # m_name -> monitorID for monitors created in this run
|
||||||
|
|
||||||
# 2. Push Monitors
|
# 2. Push Monitors
|
||||||
for pm in config.get("push_monitors", []):
|
for pm in config.get("push_monitors", []):
|
||||||
@ -169,15 +170,24 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
if notif_ids:
|
if notif_ids:
|
||||||
kwargs["notification_id_list"] = notif_ids
|
kwargs["notification_id_list"] = notif_ids
|
||||||
result = api.add_monitor(**kwargs)
|
result = api.add_monitor(**kwargs)
|
||||||
m_id = result['monitorID']
|
new_monitor_ids[m_name] = result['monitorID']
|
||||||
|
|
||||||
for m in api.get_monitors():
|
|
||||||
if m['id'] == m_id:
|
|
||||||
tokens[m_name] = m['pushToken']
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
tokens[m_name] = "dummy_token_dry_run"
|
tokens[m_name] = "dummy_token_dry_run"
|
||||||
|
|
||||||
|
# Fetch push tokens for newly created monitors in one batch call.
|
||||||
|
# Calling api.get_monitors() per-monitor races with WebSocket event delivery;
|
||||||
|
# a single call after all creates allows the server state to settle.
|
||||||
|
if new_monitor_ids and api:
|
||||||
|
id_to_name = {v: k for k, v in new_monitor_ids.items()}
|
||||||
|
for m in api.get_monitors():
|
||||||
|
if m['id'] in id_to_name:
|
||||||
|
m_name = id_to_name[m['id']]
|
||||||
|
tokens[m_name] = m.get('pushToken', '')
|
||||||
|
logger.info(f"Captured push token for {m_name}")
|
||||||
|
missing = [n for n in new_monitor_ids if n not in tokens]
|
||||||
|
if missing:
|
||||||
|
logger.warning(f"Could not capture push token for: {missing}")
|
||||||
|
|
||||||
# 3. HTTP Monitors
|
# 3. HTTP Monitors
|
||||||
for hm in config.get("http_monitors", []):
|
for hm in config.get("http_monitors", []):
|
||||||
m_name = hm["name"]
|
m_name = hm["name"]
|
||||||
@ -269,7 +279,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
"name": m_name,
|
"name": m_name,
|
||||||
"hostname": ip,
|
"hostname": ip,
|
||||||
"interval": ping_interval,
|
"interval": ping_interval,
|
||||||
"max_retries": ping_retries,
|
"maxretries": ping_retries,
|
||||||
}
|
}
|
||||||
if parent_group_id is not None:
|
if parent_group_id is not None:
|
||||||
kwargs["parent"] = parent_group_id
|
kwargs["parent"] = parent_group_id
|
||||||
@ -299,7 +309,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
"name": m_name,
|
"name": m_name,
|
||||||
"hostname": ip,
|
"hostname": ip,
|
||||||
"interval": ping_interval,
|
"interval": ping_interval,
|
||||||
"max_retries": ping_retries,
|
"maxretries": ping_retries,
|
||||||
}
|
}
|
||||||
if parent_group_id is not None:
|
if parent_group_id is not None:
|
||||||
kwargs["parent"] = parent_group_id
|
kwargs["parent"] = parent_group_id
|
||||||
@ -312,13 +322,6 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
|
|
||||||
# 6. Status Pages
|
# 6. Status Pages
|
||||||
if api:
|
if api:
|
||||||
all_monitors = {}
|
|
||||||
try:
|
|
||||||
for m in api.get_monitors():
|
|
||||||
all_monitors[m['name']] = m
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to re-fetch monitors for status pages: {e}")
|
|
||||||
|
|
||||||
existing_pages = {}
|
existing_pages = {}
|
||||||
try:
|
try:
|
||||||
for p in api.get_status_pages():
|
for p in api.get_status_pages():
|
||||||
@ -339,18 +342,18 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
api.add_status_page(slug, title)
|
api.add_status_page(slug, title)
|
||||||
|
|
||||||
# Each monitors.yml group becomes one display section on the status page.
|
# Each monitors.yml group becomes one display section on the status page.
|
||||||
# The GROUP monitor is added so Uptime Kuma renders it with all its children.
|
# Use group_map (populated during Section 1) to avoid re-fetching monitors;
|
||||||
|
# a fresh get_monitors() call after add_monitor() races with WebSocket delivery.
|
||||||
public_group_list = []
|
public_group_list = []
|
||||||
for group_raw_name in sp_groups:
|
for group_raw_name in sp_groups:
|
||||||
group_formatted = f"{project} [{env_name}] {group_raw_name}"
|
group_id = group_map.get(group_raw_name)
|
||||||
group_monitor = all_monitors.get(group_formatted)
|
if not group_id:
|
||||||
if not group_monitor:
|
logger.warning(f"Group '{group_raw_name}' not in group_map, skipping in status page")
|
||||||
logger.warning(f"Group '{group_formatted}' not found, skipping in status page")
|
|
||||||
continue
|
continue
|
||||||
public_group_list.append({
|
public_group_list.append({
|
||||||
"name": group_raw_name,
|
"name": group_raw_name,
|
||||||
"weight": len(public_group_list) + 1,
|
"weight": len(public_group_list) + 1,
|
||||||
"monitorList": [{"id": group_monitor['id']}]
|
"monitorList": [{"id": group_id}]
|
||||||
})
|
})
|
||||||
|
|
||||||
if public_group_list:
|
if public_group_list:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user