diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py index 75841af..4789266 100644 --- a/health-agent/scripts/setup_uptime_kuma.py +++ b/health-agent/scripts/setup_uptime_kuma.py @@ -103,7 +103,7 @@ def setup_uptime_kuma(dry_run=False, only=None): type=NotificationType.SLACK, name=notif_name, isDefault=False, - webhookURL=webhook_url, + slackwebhookURL=webhook_url, applyExisting=False ) notification_map[notif_key] = res.get('id') @@ -138,6 +138,7 @@ def setup_uptime_kuma(dry_run=False, only=None): group_map[raw_name] = existing_monitors[formatted_name]['id'] tokens = {} + new_monitor_ids = {} # m_name -> monitorID for monitors created in this run # 2. Push Monitors for pm in config.get("push_monitors", []): @@ -169,15 +170,24 @@ def setup_uptime_kuma(dry_run=False, only=None): if notif_ids: kwargs["notification_id_list"] = notif_ids result = api.add_monitor(**kwargs) - m_id = result['monitorID'] - - for m in api.get_monitors(): - if m['id'] == m_id: - tokens[m_name] = m['pushToken'] - break + new_monitor_ids[m_name] = result['monitorID'] else: tokens[m_name] = "dummy_token_dry_run" + # Fetch push tokens for newly created monitors in one batch call. + # Calling api.get_monitors() per-monitor races with WebSocket event delivery; + # a single call after all creates allows the server state to settle. + if new_monitor_ids and api: + id_to_name = {v: k for k, v in new_monitor_ids.items()} + for m in api.get_monitors(): + if m['id'] in id_to_name: + m_name = id_to_name[m['id']] + tokens[m_name] = m.get('pushToken', '') + logger.info(f"Captured push token for {m_name}") + missing = [n for n in new_monitor_ids if n not in tokens] + if missing: + logger.warning(f"Could not capture push token for: {missing}") + # 3. HTTP Monitors for hm in config.get("http_monitors", []): m_name = hm["name"] @@ -269,7 +279,7 @@ def setup_uptime_kuma(dry_run=False, only=None): "name": m_name, "hostname": ip, "interval": ping_interval, - "max_retries": ping_retries, + "maxretries": ping_retries, } if parent_group_id is not None: kwargs["parent"] = parent_group_id @@ -299,7 +309,7 @@ def setup_uptime_kuma(dry_run=False, only=None): "name": m_name, "hostname": ip, "interval": ping_interval, - "max_retries": ping_retries, + "maxretries": ping_retries, } if parent_group_id is not None: kwargs["parent"] = parent_group_id @@ -312,13 +322,6 @@ def setup_uptime_kuma(dry_run=False, only=None): # 6. Status Pages if api: - all_monitors = {} - try: - for m in api.get_monitors(): - all_monitors[m['name']] = m - except Exception as e: - logger.warning(f"Failed to re-fetch monitors for status pages: {e}") - existing_pages = {} try: for p in api.get_status_pages(): @@ -339,18 +342,18 @@ def setup_uptime_kuma(dry_run=False, only=None): api.add_status_page(slug, title) # Each monitors.yml group becomes one display section on the status page. - # The GROUP monitor is added so Uptime Kuma renders it with all its children. + # Use group_map (populated during Section 1) to avoid re-fetching monitors; + # a fresh get_monitors() call after add_monitor() races with WebSocket delivery. public_group_list = [] for group_raw_name in sp_groups: - group_formatted = f"{project} [{env_name}] {group_raw_name}" - group_monitor = all_monitors.get(group_formatted) - if not group_monitor: - logger.warning(f"Group '{group_formatted}' not found, skipping in status page") + group_id = group_map.get(group_raw_name) + if not group_id: + logger.warning(f"Group '{group_raw_name}' not in group_map, skipping in status page") continue public_group_list.append({ "name": group_raw_name, "weight": len(public_group_list) + 1, - "monitorList": [{"id": group_monitor['id']}] + "monitorList": [{"id": group_id}] }) if public_group_list: