From 8a056a381b114b599870686fd246e8daf052145b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= Date: Fri, 26 Jun 2026 23:23:02 +0300 Subject: [PATCH] fix(monitoring): prevent Vault crash and DNS null error - Vault: Wrap resp.json() in a try-except block to prevent JSONDecodeError when hitting an HTML error page (e.g. 502/503). This prevents the entire agent from crashing and missing heartbeats. - Uptime Kuma DNS: Explicitly set dns_resolve_server to 1.1.1.1 in Python API payload to prevent Uptime Kuma backend from crashing on null properties. --- health-agent/scripts/setup_uptime_kuma.py | 2 ++ health-agent/src/health_agent/checks/http.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py index 8a9294e..985f5d5 100644 --- a/health-agent/scripts/setup_uptime_kuma.py +++ b/health-agent/scripts/setup_uptime_kuma.py @@ -261,6 +261,7 @@ def setup_uptime_kuma(dry_run=False, only=None): "hostname": hostname, "port": 53, "accepted_statuscodes": ["200-299"], + "dns_resolve_server": "1.1.1.1", "dns_resolve_type": dns_resolve_type, "interval": interval, } @@ -280,6 +281,7 @@ def setup_uptime_kuma(dry_run=False, only=None): "hostname": hostname, "port": 53, "accepted_statuscodes": ["200-299"], + "dns_resolve_server": "1.1.1.1", "dns_resolve_type": dns_resolve_type, "interval": interval, } diff --git a/health-agent/src/health_agent/checks/http.py b/health-agent/src/health_agent/checks/http.py index 5191392..257d721 100644 --- a/health-agent/src/health_agent/checks/http.py +++ b/health-agent/src/health_agent/checks/http.py @@ -140,11 +140,14 @@ def check_vault(): max_ping = max(max_ping, ms) if resp is not None: - data = resp.json() - if not data.get("sealed"): - unsealed_count += 1 - else: - errors.append(f"{node} SEALED") + try: + data = resp.json() + if not data.get("sealed"): + unsealed_count += 1 + else: + errors.append(f"{node} SEALED") + except Exception as e: + errors.append(f"{node} invalid response: {resp.status_code}") else: errors.append(f"{node} unreachable: {err}")