From 8347b7e25dc15cdd7155482869ddd934765e3d74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 18:50:31 +0300
Subject: [PATCH 01/22] fix(common-functions): add no-op to empty
 refresh_calculated_env_vars to fix bash syntax error

---
 common-functions-base.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common-functions-base.sh b/common-functions-base.sh
index aed8052..fca5434 100644
--- a/common-functions-base.sh
+++ b/common-functions-base.sh
@@ -68,7 +68,7 @@ lookup_env_value() {
 
 # Matematiksel veya mantıksal işlem gerektiren env değerlerini hesaplar.
 refresh_calculated_env_vars() {
-  
+  :
 }
 
 # Tüm çevre dosyalarını (ana env, ortak sırlar ve servis sırları) tazeleyerek yükler.

From 656968823b99adacc9142f8280b9486d820e9b25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 18:55:41 +0300
Subject: [PATCH 02/22] ci(workflow): replace paths filter with paths-ignore to
 trigger on any change except .venv and __pycache__

---
 .gitea/workflows/deploy-monitoring-prod.yml | 8 +++-----
 .gitea/workflows/deploy-monitoring-test.yml | 8 +++-----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/.gitea/workflows/deploy-monitoring-prod.yml b/.gitea/workflows/deploy-monitoring-prod.yml
index 10bcaf9..b4188a4 100644
--- a/.gitea/workflows/deploy-monitoring-prod.yml
+++ b/.gitea/workflows/deploy-monitoring-prod.yml
@@ -4,11 +4,9 @@ on:
   push:
     branches:
       - prod-env
-    paths:
-      - 'docker-stack-monitoring.yml'
-      - 'health-agent/deploy/prod.env'
-      - 'swag/**'
-      - '.gitea/workflows/deploy-monitoring-prod.yml'
+    paths-ignore:
+      - '**/.venv/**'
+      - '**/__pycache__/**'
 
 concurrency:
   group: prod-monitoring-deploy
diff --git a/.gitea/workflows/deploy-monitoring-test.yml b/.gitea/workflows/deploy-monitoring-test.yml
index f96d7f5..ccca68e 100644
--- a/.gitea/workflows/deploy-monitoring-test.yml
+++ b/.gitea/workflows/deploy-monitoring-test.yml
@@ -4,11 +4,9 @@ on:
   push:
     branches:
       - test
-    paths:
-      - 'docker-stack-monitoring.yml'
-      - 'health-agent/**'
-      - 'swag/**'
-      - '.gitea/workflows/deploy-monitoring-test.yml'
+    paths-ignore:
+      - '**/.venv/**'
+      - '**/__pycache__/**'
 
 jobs:
   deploy:

From 344ab4ac131abc59438c646667909660777acecc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 18:56:13 +0300
Subject: [PATCH 03/22] ci(workflow): remove redundant paths-ignore filter,
 gitignore already excludes those paths

---
 .gitea/workflows/deploy-monitoring-prod.yml | 4 +---
 .gitea/workflows/deploy-monitoring-test.yml | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/.gitea/workflows/deploy-monitoring-prod.yml b/.gitea/workflows/deploy-monitoring-prod.yml
index b4188a4..f6d3c9d 100644
--- a/.gitea/workflows/deploy-monitoring-prod.yml
+++ b/.gitea/workflows/deploy-monitoring-prod.yml
@@ -4,9 +4,7 @@ on:
   push:
     branches:
       - prod-env
-    paths-ignore:
-      - '**/.venv/**'
-      - '**/__pycache__/**'
+
 
 concurrency:
   group: prod-monitoring-deploy
diff --git a/.gitea/workflows/deploy-monitoring-test.yml b/.gitea/workflows/deploy-monitoring-test.yml
index ccca68e..7dc1d18 100644
--- a/.gitea/workflows/deploy-monitoring-test.yml
+++ b/.gitea/workflows/deploy-monitoring-test.yml
@@ -4,9 +4,7 @@ on:
   push:
     branches:
       - test
-    paths-ignore:
-      - '**/.venv/**'
-      - '**/__pycache__/**'
+
 
 jobs:
   deploy:

From 0ef4f0b6f80f3aca307a1a2a88db428c30a3bb84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 19:24:01 +0300
Subject: [PATCH 04/22] refactor: rename iklimco-monitoring stack to monitoring

---
 .gitea/workflows/deploy-monitoring-prod.yml |   8 +-
 .gitea/workflows/deploy-monitoring-test.yml |   8 +-
 README.md                                   |   2 +-
 health-agent/README.md                      |   4 +-
 health-agent/scripts/setup_uptime_kuma.py   | 332 +++++++++++++++++---
 5 files changed, 299 insertions(+), 55 deletions(-)

diff --git a/.gitea/workflows/deploy-monitoring-prod.yml b/.gitea/workflows/deploy-monitoring-prod.yml
index f6d3c9d..80ccc6f 100644
--- a/.gitea/workflows/deploy-monitoring-prod.yml
+++ b/.gitea/workflows/deploy-monitoring-prod.yml
@@ -119,14 +119,14 @@ jobs:
             --with-registry-auth \
             --resolve-image changed \
             -c docker-stack-monitoring.yml \
-            iklimco-monitoring
+            monitoring
 
       - name: Wait for Loki
         run: |
           source ./common-functions-base.sh
           export SPRING_PROFILES_ACTIVE=PROD
           for i in $(seq 1 36); do
-            REPLICAS=$(docker service ls --filter name=iklimco-monitoring_loki --format "{{.Replicas}}" | head -1)
+            REPLICAS=$(docker service ls --filter name=monitoring_loki --format "{{.Replicas}}" | head -1)
             if echo "$REPLICAS" | awk -F'[/ ]' '$1>0 && $1==$2{found=1} END{exit !found}'; then
               log_message "SUCCESS" "Loki is ready: $REPLICAS"
               exit 0
@@ -134,7 +134,7 @@ jobs:
             log_message "INFO" "Loki not ready yet (${REPLICAS:-missing}), waiting 5s..."
             sleep 5
           done
-          docker service ps iklimco-monitoring_loki || true
+          docker service ps monitoring_loki || true
           exit 1
 
       - name: Configure SWAG Reverse Proxy
@@ -190,6 +190,6 @@ jobs:
 
       - name: Verify Deployment
         run: |
-          docker service ps iklimco-monitoring_loki \
+          docker service ps monitoring_loki \
             --filter "desired-state=running" \
             --format "table {{.Name}}\t{{.Node}}\t{{.CurrentState}}\t{{.Image}}" | head -20
diff --git a/.gitea/workflows/deploy-monitoring-test.yml b/.gitea/workflows/deploy-monitoring-test.yml
index 7dc1d18..f271fc4 100644
--- a/.gitea/workflows/deploy-monitoring-test.yml
+++ b/.gitea/workflows/deploy-monitoring-test.yml
@@ -105,14 +105,14 @@ jobs:
             --with-registry-auth \
             --resolve-image changed \
             -c docker-stack-monitoring.yml \
-            iklimco-monitoring
+            monitoring
 
       - name: Wait for Loki
         run: |
           source ./common-functions-base.sh
           export SPRING_PROFILES_ACTIVE=TEST
           for i in $(seq 1 36); do
-            REPLICAS=$(docker service ls --filter name=iklimco-monitoring_loki --format "{{.Replicas}}" | head -1)
+            REPLICAS=$(docker service ls --filter name=monitoring_loki --format "{{.Replicas}}" | head -1)
             if echo "$REPLICAS" | awk -F'[/ ]' '$1>0 && $1==$2{found=1} END{exit !found}'; then
               log_message "SUCCESS" "Loki is ready: $REPLICAS"
               exit 0
@@ -120,7 +120,7 @@ jobs:
             log_message "INFO" "Loki not ready yet (${REPLICAS:-missing}), waiting 5s..."
             sleep 5
           done
-          docker service ps iklimco-monitoring_loki || true
+          docker service ps monitoring_loki || true
           exit 1
 
       - name: Configure SWAG Reverse Proxy
@@ -176,6 +176,6 @@ jobs:
 
       - name: Verify Deployment
         run: |
-          docker service ps iklimco-monitoring_loki \
+          docker service ps monitoring_loki \
             --filter "desired-state=running" \
             --format "table {{.Name}}\t{{.Node}}\t{{.CurrentState}}\t{{.Image}}" | head -20
diff --git a/README.md b/README.md
index e6db89a..67b41c1 100644
--- a/README.md
+++ b/README.md
@@ -135,7 +135,7 @@ Mevcut dashboard'lara log paneli eklemek için:
 docker stack deploy \
   --with-registry-auth \
   -c Environment_Monitoring/docker-stack-monitoring.yml \
-  iklimco-monitoring
+  monitoring
 ```
 
 Prod için Gitea workflow'u: `Environment_Monitoring/.gitea/workflows/deploy-monitoring-prod.yml`
diff --git a/health-agent/README.md b/health-agent/README.md
index 5ab86a8..1789d0a 100644
--- a/health-agent/README.md
+++ b/health-agent/README.md
@@ -190,7 +190,7 @@ python scripts/setup_uptime_kuma.py
 docker stack deploy \
   --with-registry-auth \
   -c docker-stack-monitoring.yml \
-  iklimco-monitoring
+  monitoring
 ```
 
 Health-agent `iklimco-net` overlay ağına bağlı olmalı ve Docker socket'a salt okunur erişimi olmalıdır.
@@ -199,7 +199,7 @@ Health-agent `iklimco-net` overlay ağına bağlı olmalı ve Docker socket'a sa
 
 ## Log Formatı
 
-Agent JSON formatında log üretir. Grafana Explore (Loki datasource, `{service="iklimco-monitoring_health-agent"}`) veya `docker service logs iklimco-monitoring_health-agent` ile izlenebilir. Her log girdisi şu alanları içerir:
+Agent JSON formatında log üretir. Grafana Explore (Loki datasource, `{service="monitoring_health-agent"}`) veya `docker service logs monitoring_health-agent` ile izlenebilir. Her log girdisi şu alanları içerir:
 
 - `check` — monitor adı
 - `status` — `up` veya `down`
diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 0bc5a35..2f6c3c7 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -3,7 +3,7 @@ import argparse
 import yaml
 import logging
 from dotenv import load_dotenv
-from uptime_kuma_api import UptimeKumaApi, MonitorType
+from uptime_kuma_api import UptimeKumaApi, MonitorType, NotificationType
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger("uk-setup")
@@ -12,24 +12,53 @@ _root = os.path.join(os.path.dirname(__file__), "..")
 load_dotenv(os.path.join(_root, ".env"))
 load_dotenv(os.path.join(_root, ".env.setup"))
 
+
 def format_str(text, env_name, project):
     if not isinstance(text, str):
         return text
     return text.replace("{env}", env_name).replace("{project}", project)
 
+
+def resolve_template(text, suffix, domain):
+    if not isinstance(text, str):
+        return text
+    return text.replace("{suffix}", suffix).replace("{domain}", domain)
+
+
+def find_parent_group(monitor_name, groups, group_map):
+    for g in groups:
+        if monitor_name in g.get("children", []):
+            return group_map.get(g["name"])
+    return None
+
+
+def find_group_notifications(monitor_name, groups, notification_map):
+    for g in groups:
+        if monitor_name in g.get("children", []):
+            ids = {}
+            for n in g.get("notifications", []):
+                nid = notification_map.get(n)
+                if nid is not None:
+                    ids[str(nid)] = True
+            return ids or None
+    return None
+
+
 def setup_uptime_kuma(dry_run=False, only=None):
     env_name = os.getenv("ENV", "test")
-    
+
     config_path = os.path.join(os.path.dirname(__file__), "..", "config", "monitors.yml")
     with open(config_path, "r") as f:
         config = yaml.safe_load(f)
-        
+
     project = config.get("project", "iklim")
-    
+    domain = os.getenv("EXTERNAL_DOMAIN", config.get("domain", {}).get("base", "iklim.co"))
+    suffix = os.getenv("EXTERNAL_SUBDOMAIN_SUFFIX", "")
+
     kuma_url = os.getenv("UK_URL", "http://localhost:3001")
     kuma_user = os.getenv("UK_USER", "admin")
     kuma_pass = os.getenv("UK_PASS", "admin")
-    
+
     api = None
     if not dry_run:
         logger.info(f"Connecting to Uptime Kuma at {kuma_url}...")
@@ -39,7 +68,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
         except Exception as e:
             logger.error(f"Login failed: {e}")
             return
-            
+
     existing_monitors = {}
     if api:
         try:
@@ -47,97 +76,312 @@ def setup_uptime_kuma(dry_run=False, only=None):
                 existing_monitors[m['name']] = m
         except Exception as e:
             logger.error(f"Failed to get monitors: {e}")
-            
-    # 1. Process Groups
+
+    # 0. Notification Providers
+    notification_map = {}
+    existing_notifications = {}
+    if api:
+        try:
+            for n in api.get_notifications():
+                existing_notifications[n['name']] = n
+        except Exception as e:
+            logger.warning(f"Failed to get notifications: {e}")
+
+    for notif_key, notif_cfg in config.get("notifications", {}).items():
+        webhook_env = notif_cfg.get("webhook_env")
+        webhook_url = os.getenv(webhook_env, "") if webhook_env else ""
+        notif_name = f"{project}-{notif_key}"
+
+        logger.info(f"Processing notification: {notif_name}")
+        if not dry_run:
+            if notif_name in existing_notifications:
+                notification_map[notif_key] = existing_notifications[notif_name]['id']
+                logger.info(f"Notification {notif_name} already exists (id={notification_map[notif_key]})")
+            elif webhook_url:
+                try:
+                    res = api.add_notification(
+                        type=NotificationType.SLACK,
+                        name=notif_name,
+                        isDefault=False,
+                        webhookURL=webhook_url,
+                        applyExisting=False
+                    )
+                    notification_map[notif_key] = res.get('id')
+                    logger.info(f"Created notification: {notif_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to create notification {notif_name}: {e}")
+            else:
+                logger.warning(f"Skipping {notif_name}: env var {webhook_env} is not set")
+
+    # 1. Groups
     group_map = {}
     for g in config.get("groups", []):
         raw_name = g["name"]
         formatted_name = f"{project} [{env_name}] {raw_name}"
-        
+
+        notif_ids = {}
+        for n in g.get("notifications", []):
+            nid = notification_map.get(n)
+            if nid is not None:
+                notif_ids[str(nid)] = True
+
         logger.info(f"Processing group: {formatted_name}")
         if not dry_run:
             if formatted_name not in existing_monitors:
                 logger.info(f"Creating group monitor: {formatted_name}")
-                res = api.add_monitor(type=MonitorType.GROUP, name=formatted_name)
+                kwargs = {"type": MonitorType.GROUP, "name": formatted_name}
+                if notif_ids:
+                    kwargs["notification_id_list"] = notif_ids
+                res = api.add_monitor(**kwargs)
                 group_map[raw_name] = res['monitorID']
             else:
                 group_map[raw_name] = existing_monitors[formatted_name]['id']
 
     tokens = {}
-    
+
     # 2. Push Monitors
     for pm in config.get("push_monitors", []):
         m_name = pm["name"]
         if only and m_name != only:
             continue
-            
+
         m_interval = pm.get("interval", 60)
-        
-        parent_group_id = None
-        for g in config.get("groups", []):
-            if m_name in g.get("children", []):
-                parent_group_id = group_map.get(g["name"])
-                break
-                
+        parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
+        notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
+
         logger.info(f"Processing push monitor: {m_name}")
         if not dry_run:
             if m_name in existing_monitors:
                 logger.info(f"Monitor {m_name} already exists.")
                 m_id = existing_monitors[m_name]['id']
-                token = existing_monitors[m_name]['pushToken']
-                tokens[m_name] = token
-                
+                tokens[m_name] = existing_monitors[m_name]['pushToken']
+
                 if parent_group_id and existing_monitors[m_name].get('parent') != parent_group_id:
                     api.edit_monitor(m_id, parent=parent_group_id)
             else:
                 logger.info(f"Creating push monitor: {m_name}")
-                result = api.add_monitor(
-                    type=MonitorType.PUSH,
-                    name=m_name,
-                    interval=m_interval,
-                    parent=parent_group_id
-                )
+                kwargs = {
+                    "type": MonitorType.PUSH,
+                    "name": m_name,
+                    "interval": m_interval,
+                    "parent": parent_group_id
+                }
+                if notif_ids:
+                    kwargs["notification_id_list"] = notif_ids
+                result = api.add_monitor(**kwargs)
                 m_id = result['monitorID']
-                
-                # Fetch again to get pushToken
+
                 for m in api.get_monitors():
                     if m['id'] == m_id:
                         tokens[m_name] = m['pushToken']
                         break
         else:
             tokens[m_name] = "dummy_token_dry_run"
-            
-    # 3. Process Status Pages
-    for sp in config.get("status_pages", []):
-        slug = format_str(sp["slug"], env_name, project)
-        title = format_str(sp["title"], env_name, project)
-        logger.info(f"Processing status page: {title} (slug: {slug})")
+
+    # 3. HTTP Monitors
+    for hm in config.get("http_monitors", []):
+        m_name = hm["name"]
+        if only and m_name != only:
+            continue
+        url = resolve_template(hm["url"], suffix, domain)
+        interval = hm.get("interval", 60)
+        accepted_statuscodes = hm.get("accepted_statuscodes", ["200"])
+        parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
+        notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
+
+        logger.info(f"Processing HTTP monitor: {m_name} -> {url}")
         if not dry_run:
+            if m_name in existing_monitors:
+                logger.info(f"Monitor {m_name} already exists.")
+            else:
+                try:
+                    kwargs = {
+                        "type": MonitorType.HTTP,
+                        "name": m_name,
+                        "url": url,
+                        "interval": interval,
+                        "accepted_statuscodes": accepted_statuscodes,
+                    }
+                    if parent_group_id is not None:
+                        kwargs["parent"] = parent_group_id
+                    if notif_ids:
+                        kwargs["notification_id_list"] = notif_ids
+                    api.add_monitor(**kwargs)
+                    logger.info(f"Created HTTP monitor: {m_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to create HTTP monitor {m_name}: {e}")
+
+    # 4. DNS Monitors
+    for dm in config.get("dns_monitors", []):
+        m_name = dm["name"]
+        if only and m_name != only:
+            continue
+        hostname = resolve_template(dm["hostname"], suffix, domain)
+        dns_resolve_type = dm.get("dns_resolve_type", "A")
+        interval = dm.get("interval", 60)
+        parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
+        notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
+
+        logger.info(f"Processing DNS monitor: {m_name} -> {hostname}")
+        if not dry_run:
+            if m_name in existing_monitors:
+                logger.info(f"Monitor {m_name} already exists.")
+            else:
+                try:
+                    kwargs = {
+                        "type": MonitorType.DNS,
+                        "name": m_name,
+                        "hostname": hostname,
+                        "dns_resolve_type": dns_resolve_type,
+                        "interval": interval,
+                    }
+                    if parent_group_id is not None:
+                        kwargs["parent"] = parent_group_id
+                    if notif_ids:
+                        kwargs["notification_id_list"] = notif_ids
+                    api.add_monitor(**kwargs)
+                    logger.info(f"Created DNS monitor: {m_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to create DNS monitor {m_name}: {e}")
+
+    # 5. Ping Monitors (generated from nodes config)
+    ping_cfg = config.get("ping_monitors", {})
+    ping_interval = ping_cfg.get("interval", 60)
+    ping_retries = ping_cfg.get("max_retries", 1)
+    env_nodes = config.get("nodes", {}).get(env_name, {})
+
+    for i, node in enumerate(env_nodes.get("service", []), 1):
+        m_name = f"EXT-PING-APP{i:02d}"
+        if only and m_name != only:
+            continue
+        ip = node["ip"]
+        parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
+        notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
+
+        logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
+        if not dry_run:
+            if m_name in existing_monitors:
+                logger.info(f"Monitor {m_name} already exists.")
+            else:
+                try:
+                    kwargs = {
+                        "type": MonitorType.PING,
+                        "name": m_name,
+                        "hostname": ip,
+                        "interval": ping_interval,
+                        "max_retries": ping_retries,
+                    }
+                    if parent_group_id is not None:
+                        kwargs["parent"] = parent_group_id
+                    if notif_ids:
+                        kwargs["notification_id_list"] = notif_ids
+                    api.add_monitor(**kwargs)
+                    logger.info(f"Created Ping monitor: {m_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
+
+    for i, node in enumerate(env_nodes.get("db", []), 1):
+        m_name = f"EXT-PING-DB{i:02d}"
+        if only and m_name != only:
+            continue
+        ip = node["ip"]
+        parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
+        notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
+
+        logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
+        if not dry_run:
+            if m_name in existing_monitors:
+                logger.info(f"Monitor {m_name} already exists.")
+            else:
+                try:
+                    kwargs = {
+                        "type": MonitorType.PING,
+                        "name": m_name,
+                        "hostname": ip,
+                        "interval": ping_interval,
+                        "max_retries": ping_retries,
+                    }
+                    if parent_group_id is not None:
+                        kwargs["parent"] = parent_group_id
+                    if notif_ids:
+                        kwargs["notification_id_list"] = notif_ids
+                    api.add_monitor(**kwargs)
+                    logger.info(f"Created Ping monitor: {m_name}")
+                except Exception as e:
+                    logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
+
+    # 6. Status Pages
+    if api:
+        all_monitors = {}
+        try:
+            for m in api.get_monitors():
+                all_monitors[m['name']] = m
+        except Exception as e:
+            logger.warning(f"Failed to re-fetch monitors for status pages: {e}")
+
+        existing_pages = {}
+        try:
+            for p in api.get_status_pages():
+                existing_pages[p['slug']] = p
+        except Exception as e:
+            logger.warning(f"Failed to get status pages: {e}")
+
+        for sp in config.get("status_pages", []):
+            slug = format_str(sp["slug"], env_name, project)
+            title = format_str(sp["title"], env_name, project)
+            is_public = sp.get("public", False)
+            sp_groups = sp.get("groups", [])
+
+            logger.info(f"Processing status page: {title} (slug: {slug})")
             try:
-                pages = api.get_status_pages()
-                exists = any(p['slug'] == slug for p in pages)
-                if not exists:
+                if slug not in existing_pages:
                     logger.info(f"Creating status page: {slug}")
                     api.add_status_page(slug, title)
+
+                # Each monitors.yml group becomes one display section on the status page.
+                # The GROUP monitor is added so Uptime Kuma renders it with all its children.
+                public_group_list = []
+                for group_raw_name in sp_groups:
+                    group_formatted = f"{project} [{env_name}] {group_raw_name}"
+                    group_monitor = all_monitors.get(group_formatted)
+                    if not group_monitor:
+                        logger.warning(f"Group '{group_formatted}' not found, skipping in status page")
+                        continue
+                    public_group_list.append({
+                        "name": group_raw_name,
+                        "weight": len(public_group_list) + 1,
+                        "monitorList": [{"id": group_monitor['id']}]
+                    })
+
+                if public_group_list:
+                    api.save_status_page(
+                        slug=slug,
+                        title=title,
+                        publicGroupList=public_group_list,
+                        published=is_public
+                    )
+                    logger.info(f"Saved status page '{slug}' with {len(public_group_list)} group(s)")
             except Exception as e:
-                logger.warning(f"Status page ops failed: {e}")
-                
-    # 4. Write tokens to uk_tokens.yml
+                logger.warning(f"Status page ops failed for {slug}: {e}")
+
+    # 7. Write push tokens to uk_tokens.yml
     token_file = os.path.join(os.path.dirname(__file__), "..", "config", "generated", "uk_tokens.yml")
     if not dry_run:
+        os.makedirs(os.path.dirname(token_file), exist_ok=True)
         with open(token_file, "w") as f:
             yaml.dump(tokens, f)
         logger.info(f"Saved push tokens to {token_file}")
     else:
         logger.info(f"[DRY-RUN] Would save {len(tokens)} tokens to {token_file}")
-        
+
     if api:
         api.disconnect()
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Setup Uptime Kuma monitors")
     parser.add_argument("--dry-run", action="store_true", help="Print actions without making changes")
     parser.add_argument("--only", type=str, help="Only process a specific monitor by name")
     args = parser.parse_args()
-    
+
     setup_uptime_kuma(dry_run=args.dry_run, only=args.only)

From 9fbc74d4985f4619648e683c8177b222f6948987 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 19:39:48 +0300
Subject: [PATCH 05/22] fix(workflow): use -s flag to trigger Uptime Kuma setup
 on empty uk_tokens.yml

The previous ! -f check skipped setup when uk_tokens.yml existed but was empty (0 bytes). Switching to ! -s triggers setup whenever the file is missing or empty.
---
 .gitea/workflows/deploy-monitoring-prod.yml | 2 +-
 .gitea/workflows/deploy-monitoring-test.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitea/workflows/deploy-monitoring-prod.yml b/.gitea/workflows/deploy-monitoring-prod.yml
index 80ccc6f..a6478b9 100644
--- a/.gitea/workflows/deploy-monitoring-prod.yml
+++ b/.gitea/workflows/deploy-monitoring-prod.yml
@@ -90,7 +90,7 @@ jobs:
           export SPRING_PROFILES_ACTIVE=PROD
           source_env_file ./health-agent/.env
           mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
-          if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
+          if [ ! -s "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
             docker run --rm \
               -v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
               --env-file "$(pwd)/health-agent/.env" \
diff --git a/.gitea/workflows/deploy-monitoring-test.yml b/.gitea/workflows/deploy-monitoring-test.yml
index f271fc4..ea5b98e 100644
--- a/.gitea/workflows/deploy-monitoring-test.yml
+++ b/.gitea/workflows/deploy-monitoring-test.yml
@@ -80,7 +80,7 @@ jobs:
           export SPRING_PROFILES_ACTIVE=TEST
           source_env_file ./health-agent/.env
           mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
-          if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
+          if [ ! -s "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
             docker run --rm \
               -v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
               --env-file "$(pwd)/health-agent/.env" \

From 8d5fe55b148cc651b272b4c4561b15fcdeb27d57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 20:06:48 +0300
Subject: [PATCH 06/22] health-agent redeploy with new image

---
 health-agent/deploy/prod.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env
index 92297cd..0a993b4 100644
--- a/health-agent/deploy/prod.env
+++ b/health-agent/deploy/prod.env
@@ -1,2 +1,2 @@
-SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:fadf229d4423075d2871f9dc4a5a0afdf6dfe7c5fcd04d866b2d6d6fe8942b56
+SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:e262bf6e6712862ba24551dc326411ebb0987da59072834b2923bd73cb5c9d3b
 PROD_IMAGE_TAG=0.1.0
\ No newline at end of file

From d51c07355611e2e58dfd4c113d4e539dec8c2a1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 20:37:42 +0300
Subject: [PATCH 07/22] fix(health-agent): fix uk_tokens.yml load race and
 LogRecord msg conflict

- config.py: Replace exists()+open() with try/except open() to avoid TOCTOU race on SSHFS mounts where stat can succeed but open can fail with FileNotFoundError.
- uptime_kuma.py: Rename msg key to push_msg in logger extra dicts. Python LogRecord reserves the msg field; passing it in extra raises ValueError which was being silently swallowed by the except block, masking successful pushes as errors.
---
 health-agent/src/health_agent/config.py      | 8 ++++----
 health-agent/src/health_agent/uptime_kuma.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/health-agent/src/health_agent/config.py b/health-agent/src/health_agent/config.py
index dbff2be..c00ad28 100644
--- a/health-agent/src/health_agent/config.py
+++ b/health-agent/src/health_agent/config.py
@@ -16,10 +16,10 @@ EXTERNAL_DOMAIN = os.getenv("EXTERNAL_DOMAIN", "iklim.co")
 EXTERNAL_SUBDOMAIN_SUFFIX = os.getenv("EXTERNAL_SUBDOMAIN_SUFFIX", "")
 
 def load_uk_tokens():
-    token_file = Path("config/generated/uk_tokens.yml")
-    if not token_file.exists():
+    try:
+        with open("config/generated/uk_tokens.yml", "r") as f:
+            return yaml.safe_load(f) or {}
+    except (FileNotFoundError, OSError):
         return {}
-    with open(token_file, "r") as f:
-        return yaml.safe_load(f) or {}
 
 UK_TOKENS = load_uk_tokens()
diff --git a/health-agent/src/health_agent/uptime_kuma.py b/health-agent/src/health_agent/uptime_kuma.py
index 357bc90..b0fcf53 100644
--- a/health-agent/src/health_agent/uptime_kuma.py
+++ b/health-agent/src/health_agent/uptime_kuma.py
@@ -15,7 +15,7 @@ def push(monitor_name: str, status: str, msg: str, ping_ms: int):
         return
 
     if DRY_RUN:
-        logger.info(f"[DRY-RUN] Would push {monitor_name} status={status} msg={msg} ping={ping_ms}ms", extra={"check": monitor_name, "status": status, "msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
+        logger.info(f"[DRY-RUN] Would push {monitor_name} status={status} msg={msg} ping={ping_ms}ms", extra={"check": monitor_name, "status": status, "push_msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
         return
 
     url = f"{UK_PUSH_URL_BASE}/{token}"
@@ -28,6 +28,6 @@ def push(monitor_name: str, status: str, msg: str, ping_ms: int):
     try:
         response = requests.get(url, params=params, timeout=10)
         response.raise_for_status()
-        logger.info(f"Pushed {monitor_name} status={status}", extra={"check": monitor_name, "status": status, "msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
+        logger.info(f"Pushed {monitor_name} status={status}", extra={"check": monitor_name, "status": status, "push_msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
     except Exception as e:
         logger.error(f"Failed to push {monitor_name}: {e}", extra={"check": monitor_name, "status": "push_failed", "error": str(e), "source": "uptime_kuma"})

From bc8b3d0934b83dbfd640c0942336e4a8fe3d21df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 20:47:31 +0300
Subject: [PATCH 08/22] refactor: convert all monitor names to Title Case and
 update health-agent digest

---
 health-agent/config/monitors.yml          | 52 +++++++++++------------
 health-agent/deploy/prod.env              |  2 +-
 health-agent/scripts/setup_uptime_kuma.py |  4 +-
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/health-agent/config/monitors.yml b/health-agent/config/monitors.yml
index 26edf01..3cfe7fd 100644
--- a/health-agent/config/monitors.yml
+++ b/health-agent/config/monitors.yml
@@ -50,126 +50,126 @@ groups:
     status_page: "iklim-{env}-ops"
     notifications: [slack-high]
     tags: [internal, infrastructure]
-    children: [SWARM-CLUSTER, VAULT-CLUSTER, STORAGEBOX-MOUNT, SWAG-TLS]
+    children: [Swarm Cluster, Vault Cluster, Storagebox Mount, Swag Tls]
   - name: "Data Layer"
     status_page: "iklim-{env}-ops"
     notifications: [slack-high]
     tags: [internal, database]
-    children: [ETCD-CLUSTER, PATRONI-CLUSTER, MONGODB-REPLICASET]
+    children: [Etcd Cluster, Patroni Cluster, Mongodb Replicaset]
   - name: "Gateway & Messaging"
     status_page: "iklim-{env}-ops"
     notifications: [slack-high]
     tags: [internal, gateway]
-    children: [APISIX-GATEWAY, RABBITMQ-CLUSTER, REDIS-SENTINEL]
+    children: [Apisix Gateway, Rabbitmq Cluster, Redis Sentinel]
   - name: "External Availability - Critical"
     status_page: "iklim-{env}-ops"
     notifications: [slack-high]
     tags: [external, high]
-    children: [EXT-HTTPS-API, EXT-DNS-API, EXT-DNS-ROOT, EXT-PING-APP01, EXT-PING-APP02, EXT-PING-APP03]
+    children: [Ext Https Api, Ext Dns Api, Ext Dns Root, Ext Ping App01, Ext Ping App02, Ext Ping App03]
   - name: "External Availability - General"
     status_page: "iklim-{env}-ops"
     notifications: [slack-medium]
     tags: [external, medium]
-    children: [EXT-HTTPS-GRAFANA, EXT-PING-DB01, EXT-PING-DB02, EXT-PING-DB03]
+    children: [Ext Https Grafana, Ext Ping Db01, Ext Ping Db02, Ext Ping Db03]
   - name: "Observability"
     status_page: "iklim-{env}-tools"
     notifications: [slack-low]
     tags: [internal, observability]
-    children: [PROMETHEUS, GRAFANA, PORTAINER, LOKI, EXT-HTTPS-PORTAINER, EXT-HTTPS-APIGW]
+    children: [Prometheus, Grafana, Portainer, Loki, Ext Https Portainer, Ext Https Apigw]
 push_monitors:
-  - name: SWARM-CLUSTER
+  - name: Swarm Cluster
     interval: 60
     heartbeat_retries: 1
     tags: [internal, infrastructure, high]
     restart_threshold: 1
-  - name: VAULT-CLUSTER
+  - name: Vault Cluster
     interval: 60
     heartbeat_retries: 1
     tags: [internal, infrastructure, high]
     restart_threshold: 1
-  - name: ETCD-CLUSTER
+  - name: Etcd Cluster
     interval: 60
     heartbeat_retries: 1
     tags: [internal, database, high]
     restart_threshold: 1
-  - name: PATRONI-CLUSTER
+  - name: Patroni Cluster
     interval: 60
     heartbeat_retries: 1
     tags: [internal, database, high]
     restart_threshold: 1
-  - name: MONGODB-REPLICASET
+  - name: Mongodb Replicaset
     interval: 120
     heartbeat_retries: 1
     tags: [internal, database, high]
     restart_threshold: 1
-  - name: APISIX-GATEWAY
+  - name: Apisix Gateway
     interval: 60
     heartbeat_retries: 1
     tags: [internal, gateway, high]
     restart_threshold: 1
-  - name: RABBITMQ-CLUSTER
+  - name: Rabbitmq Cluster
     interval: 60
     heartbeat_retries: 1
     tags: [internal, gateway, medium]
     restart_threshold: 3
-  - name: REDIS-SENTINEL
+  - name: Redis Sentinel
     interval: 60
     heartbeat_retries: 1
     tags: [internal, database, medium]
     restart_threshold: 3
-  - name: SWAG-TLS
+  - name: Swag Tls
     interval: 3600
     heartbeat_retries: 1
     tags: [internal, infrastructure, medium]
     restart_threshold: 3
-  - name: STORAGEBOX-MOUNT
+  - name: Storagebox Mount
     interval: 300
     heartbeat_retries: 1
     tags: [internal, infrastructure, medium]
     restart_threshold: 1
-  - name: PROMETHEUS
+  - name: Prometheus
     interval: 120
     heartbeat_retries: 1
     tags: [internal, observability, low]
     restart_threshold: 5
-  - name: GRAFANA
+  - name: Grafana
     interval: 120
     heartbeat_retries: 1
     tags: [internal, observability, low]
     restart_threshold: 5
-  - name: PORTAINER
+  - name: Portainer
     interval: 120
     heartbeat_retries: 1
     tags: [internal, observability, low]
     restart_threshold: 5
-  - name: LOKI
+  - name: Loki
     interval: 120
     heartbeat_retries: 1
     tags: [internal, observability, low]
     restart_threshold: 5
 http_monitors:
-  - name: EXT-HTTPS-API
+  - name: Ext Https Api
     url: "https://api{suffix}.{domain}/actuator/health"
     accepted_statuscodes: ["200"]
     interval: 60
-  - name: EXT-HTTPS-GRAFANA
+  - name: Ext Https Grafana
     url: "https://grafana{suffix}.{domain}/api/health"
     accepted_statuscodes: ["200"]
     interval: 60
-  - name: EXT-HTTPS-PORTAINER
+  - name: Ext Https Portainer
     url: "https://portainer{suffix}.{domain}"
     accepted_statuscodes: ["200", "401", "403"]
     interval: 120
-  - name: EXT-HTTPS-APIGW
+  - name: Ext Https Apigw
     url: "https://apigw{suffix}.{domain}"
     accepted_statuscodes: ["200", "401", "403"]
     interval: 120
 dns_monitors:
-  - name: EXT-DNS-API
+  - name: Ext Dns Api
     hostname: "api{suffix}.{domain}"
     dns_resolve_type: A
     interval: 60
-  - name: EXT-DNS-ROOT
+  - name: Ext Dns Root
     hostname: "{domain}"
     dns_resolve_type: A
     interval: 60
diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env
index 0a993b4..969ab4c 100644
--- a/health-agent/deploy/prod.env
+++ b/health-agent/deploy/prod.env
@@ -1,2 +1,2 @@
-SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:e262bf6e6712862ba24551dc326411ebb0987da59072834b2923bd73cb5c9d3b
+SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:a2ed1cbaabf116e49d1685e37e0335798d1fe49a2d95457717c68b1576894062
 PROD_IMAGE_TAG=0.1.0
\ No newline at end of file
diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 2f6c3c7..75841af 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -251,7 +251,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
     env_nodes = config.get("nodes", {}).get(env_name, {})
 
     for i, node in enumerate(env_nodes.get("service", []), 1):
-        m_name = f"EXT-PING-APP{i:02d}"
+        m_name = f"Ext Ping App{i:02d}"
         if only and m_name != only:
             continue
         ip = node["ip"]
@@ -281,7 +281,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
 
     for i, node in enumerate(env_nodes.get("db", []), 1):
-        m_name = f"EXT-PING-DB{i:02d}"
+        m_name = f"Ext Ping Db{i:02d}"
         if only and m_name != only:
             continue
         ip = node["ip"]

From 3c2e872bf4baf7a8152a6bcb53cb014b184f4261 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 20:52:35 +0300
Subject: [PATCH 09/22] refactor(health-agent): rename monitor keys to Title
 Case With Space

Update all hardcoded push monitor names in check files to match the new Title Case With Space format in monitors.yml. The uk_tokens.yml keys are derived from monitor names so the push() calls must match exactly.
---
 .../src/health_agent/checks/filesystem.py     |  6 +--
 health-agent/src/health_agent/checks/http.py  | 38 +++++++++----------
 .../src/health_agent/checks/mongodb.py        | 10 ++---
 .../src/health_agent/checks/redis_sentinel.py |  6 +--
 health-agent/src/health_agent/checks/swarm.py |  6 +--
 health-agent/src/health_agent/checks/tcp.py   |  4 +-
 health-agent/src/health_agent/checks/tls.py   |  4 +-
 7 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/health-agent/src/health_agent/checks/filesystem.py b/health-agent/src/health_agent/checks/filesystem.py
index 8742091..b06fdfd 100644
--- a/health-agent/src/health_agent/checks/filesystem.py
+++ b/health-agent/src/health_agent/checks/filesystem.py
@@ -18,7 +18,7 @@ def check_storagebox_mount():
     
     if not os.path.exists(storagebox_path):
         ping_ms = int((time.time() - start_t) * 1000)
-        push("STORAGEBOX-MOUNT", "down", f"{storagebox_path} not found", ping_ms)
+        push("Storagebox Mount", "down", f"{storagebox_path} not found", ping_ms)
         return
         
     for rel_path in expected_files:
@@ -30,7 +30,7 @@ def check_storagebox_mount():
     
     if missing_files:
         msg = f"mount exists but missing: {', '.join(missing_files)}"
-        push("STORAGEBOX-MOUNT", "down", msg, ping_ms)
+        push("Storagebox Mount", "down", msg, ping_ms)
     else:
         msg = f"{storagebox_path} OK | all critical files present"
-        push("STORAGEBOX-MOUNT", "up", msg, ping_ms)
+        push("Storagebox Mount", "up", msg, ping_ms)
diff --git a/health-agent/src/health_agent/checks/http.py b/health-agent/src/health_agent/checks/http.py
index f7001c0..ea9d96b 100644
--- a/health-agent/src/health_agent/checks/http.py
+++ b/health-agent/src/health_agent/checks/http.py
@@ -54,7 +54,7 @@ def check_patroni_cluster():
     ping_ms = int((time.time() - start_t) * 1000)
     
     if not cluster_data:
-        push("PATRONI-CLUSTER", "down", error_msg, ping_ms)
+        push("Patroni Cluster", "down", error_msg, ping_ms)
         return
         
     members = cluster_data.get("members", [])
@@ -73,7 +73,7 @@ def check_patroni_cluster():
     if not leader:
         down_nodes = [f"{r[0]} state: {r[2]}" for r in replicas if r[2] not in ("running", "streaming")]
         msg = f"no leader detected | " + " ".join(down_nodes)
-        push("PATRONI-CLUSTER", "down", msg, ping_ms)
+        push("Patroni Cluster", "down", msg, ping_ms)
     else:
         lag_strs = []
         for name, lag, state in replicas:
@@ -81,7 +81,7 @@ def check_patroni_cluster():
             lag_strs.append(f"{name} (lag:{lag_mb:.0f}MB)")
             
         msg = f"leader: {leader} | replicas: " + " ".join(lag_strs)
-        push("PATRONI-CLUSTER", "up", msg, ping_ms)
+        push("Patroni Cluster", "up", msg, ping_ms)
 
 def check_rabbitmq_cluster():
     url = "http://rabbitmq:15672/api/healthchecks/node"
@@ -104,14 +104,14 @@ def check_rabbitmq_cluster():
             alarms = [n.get("name") for n in data if n.get("mem_alarm") or n.get("disk_free_alarm")]
             if alarms:
                 msg = f"disk/mem alarm active on {','.join(alarms)}"
-                push("RABBITMQ-CLUSTER", "down", msg, ping_ms)
+                push("Rabbitmq Cluster", "down", msg, ping_ms)
                 return
                 
         msg = f"{nodes_running}/{total_nodes} nodes running"
-        push("RABBITMQ-CLUSTER", "up", msg, ping_ms)
+        push("Rabbitmq Cluster", "up", msg, ping_ms)
     else:
         msg = err or f"HTTP {resp.status_code if resp else 'Unknown'}"
-        push("RABBITMQ-CLUSTER", "down", msg, ping_ms)
+        push("Rabbitmq Cluster", "down", msg, ping_ms)
 
 def check_apisix():
     url = "http://apisix:9180/apisix/admin/routes"
@@ -120,9 +120,9 @@ def check_apisix():
     ok, resp, ping_ms, err = http_check(url, headers=headers)
     
     if ok:
-        push("APISIX-GATEWAY", "up", "admin API reachable", ping_ms)
+        push("Apisix Gateway", "up", "admin API reachable", ping_ms)
     else:
-        push("APISIX-GATEWAY", "down", f"admin API unreachable: {err or resp.status_code}", ping_ms)
+        push("Apisix Gateway", "down", f"admin API unreachable: {err or resp.status_code}", ping_ms)
 
 def check_vault():
     hosts_env = os.getenv("VAULT_HOSTS", "vault")
@@ -152,18 +152,18 @@ def check_vault():
     
     if unsealed_count == total:
         msg = f"{unsealed_count}/{total} unsealed"
-        push("VAULT-CLUSTER", "up", msg, ping_ms)
+        push("Vault Cluster", "up", msg, ping_ms)
     else:
         msg = " | ".join(errors) if errors else "Vault checks failed"
-        push("VAULT-CLUSTER", "down", msg, ping_ms)
+        push("Vault Cluster", "down", msg, ping_ms)
 
 def check_prometheus():
     url = "http://prometheus:9090/-/healthy"
     ok, resp, ping_ms, err = http_check(url)
     if ok:
-        push("PROMETHEUS", "up", "healthy", ping_ms)
+        push("Prometheus", "up", "healthy", ping_ms)
     else:
-        push("PROMETHEUS", "down", f"prometheus unreachable: {err}", ping_ms)
+        push("Prometheus", "down", f"prometheus unreachable: {err}", ping_ms)
 
 def check_grafana():
     url = "http://grafana:3000/api/health"
@@ -172,27 +172,27 @@ def check_grafana():
         data = resp.json()
         db_status = data.get("database", "unknown")
         if db_status == "ok":
-            push("GRAFANA", "up", f"ok | db: {db_status}", ping_ms)
+            push("Grafana", "up", f"ok | db: {db_status}", ping_ms)
         else:
-            push("GRAFANA", "down", f"db not ok: {db_status}", ping_ms)
+            push("Grafana", "down", f"db not ok: {db_status}", ping_ms)
     else:
-        push("GRAFANA", "down", f"grafana unreachable: {err}", ping_ms)
+        push("Grafana", "down", f"grafana unreachable: {err}", ping_ms)
 
 def check_portainer():
     url = "http://portainer:9000/api/system/status"
     ok, resp, ping_ms, err = http_check(url)
     if ok:
-        push("PORTAINER", "up", "running", ping_ms)
+        push("Portainer", "up", "running", ping_ms)
     else:
-        push("PORTAINER", "down", f"portainer unreachable: {err}", ping_ms)
+        push("Portainer", "down", f"portainer unreachable: {err}", ping_ms)
 
 def check_loki():
     url = "http://loki:3100/ready"
     ok, resp, ping_ms, err = http_check(url)
     if ok:
-        push("LOKI", "up", "ready", ping_ms)
+        push("Loki", "up", "ready", ping_ms)
     else:
-        push("LOKI", "down", f"loki unreachable: {err}", ping_ms)
+        push("Loki", "down", f"loki unreachable: {err}", ping_ms)
 
 def run_all_http_checks():
     check_patroni_cluster()
diff --git a/health-agent/src/health_agent/checks/mongodb.py b/health-agent/src/health_agent/checks/mongodb.py
index 593cef6..9d80363 100644
--- a/health-agent/src/health_agent/checks/mongodb.py
+++ b/health-agent/src/health_agent/checks/mongodb.py
@@ -35,7 +35,7 @@ def check_mongodb():
         ping_ms = int((time.time() - start_t) * 1000)
         
         if cluster_size == 1:
-            push("MONGODB-REPLICASET", "up", "standalone mode OK", ping_ms)
+            push("Mongodb Replicaset", "up", "standalone mode OK", ping_ms)
             return
 
         if primary:
@@ -45,13 +45,13 @@ def check_mongodb():
             unhealthy_secs = [s for s in secondaries if s[1] not in ('SECONDARY', 'ARBITER')]
             if unhealthy_secs:
                 msg = f"PRIMARY: {primary} | unhealthy: {','.join([s[0] + ':' + s[1] for s in unhealthy_secs])}"
-                push("MONGODB-REPLICASET", "down", msg, ping_ms)
+                push("Mongodb Replicaset", "down", msg, ping_ms)
             else:
-                push("MONGODB-REPLICASET", "up", msg, ping_ms)
+                push("Mongodb Replicaset", "up", msg, ping_ms)
         else:
             msg = "no PRIMARY | quorum lost"
-            push("MONGODB-REPLICASET", "down", msg, ping_ms)
+            push("Mongodb Replicaset", "down", msg, ping_ms)
             
     except Exception as e:
         ping_ms = int((time.time() - start_t) * 1000)
-        push("MONGODB-REPLICASET", "down", f"connection failed: {e}", ping_ms)
+        push("Mongodb Replicaset", "down", f"connection failed: {e}", ping_ms)
diff --git a/health-agent/src/health_agent/checks/redis_sentinel.py b/health-agent/src/health_agent/checks/redis_sentinel.py
index 0a05ded..1dea54c 100644
--- a/health-agent/src/health_agent/checks/redis_sentinel.py
+++ b/health-agent/src/health_agent/checks/redis_sentinel.py
@@ -24,7 +24,7 @@ def check_redis_sentinel():
     redis_mode = os.getenv("REDIS_MODE", "sentinel")
     
     if redis_mode != "sentinel":
-        push("REDIS-SENTINEL", "up", "standalone mode (skipped)", int((time.time() - start_t) * 1000))
+        push("Redis Sentinel", "up", "standalone mode (skipped)", int((time.time() - start_t) * 1000))
         return
 
     try:
@@ -43,8 +43,8 @@ def check_redis_sentinel():
         ping_ms = int((time.time() - start_t) * 1000)
         
         msg = f"master: {master_ip}:{master_port} | replicas: {replicas_count} | sentinels quorum OK"
-        push("REDIS-SENTINEL", "up", msg, ping_ms)
+        push("Redis Sentinel", "up", msg, ping_ms)
         
     except Exception as e:
         ping_ms = int((time.time() - start_t) * 1000)
-        push("REDIS-SENTINEL", "down", f"quorum FAIL or master unreachable: {e}", ping_ms)
+        push("Redis Sentinel", "down", f"quorum FAIL or master unreachable: {e}", ping_ms)
diff --git a/health-agent/src/health_agent/checks/swarm.py b/health-agent/src/health_agent/checks/swarm.py
index 2c70b29..a9b64b4 100644
--- a/health-agent/src/health_agent/checks/swarm.py
+++ b/health-agent/src/health_agent/checks/swarm.py
@@ -38,12 +38,12 @@ def check_swarm_cluster():
         
         if ready_count == total_nodes:
             msg = f"{ready_count}/{total_nodes} nodes Ready (managers: {', '.join(managers)})"
-            push("SWARM-CLUSTER", "up", msg, ping_ms)
+            push("Swarm Cluster", "up", msg, ping_ms)
         else:
             msg = f"{ready_count}/{total_nodes} nodes Ready | Managers reachable: {len(managers)}"
-            push("SWARM-CLUSTER", "down", msg, ping_ms)
+            push("Swarm Cluster", "down", msg, ping_ms)
             
     except Exception as e:
         ping_ms = int((time.time() - start_time) * 1000)
         logger.error(f"Swarm check failed: {e}")
-        push("SWARM-CLUSTER", "down", str(e), ping_ms)
+        push("Swarm Cluster", "down", str(e), ping_ms)
diff --git a/health-agent/src/health_agent/checks/tcp.py b/health-agent/src/health_agent/checks/tcp.py
index c613049..5b00816 100644
--- a/health-agent/src/health_agent/checks/tcp.py
+++ b/health-agent/src/health_agent/checks/tcp.py
@@ -70,8 +70,8 @@ def check_etcd_cluster():
     if healthy_count == len(nodes):
         leader_info = f" | leader: {leader}" if leader else ""
         msg = f"{healthy_count}/{len(nodes)} healthy{leader_info}"
-        push("ETCD-CLUSTER", "up", msg, ping_ms)
+        push("Etcd Cluster", "up", msg, ping_ms)
     else:
         quorum_msg = f" | quorum at risk ({healthy_count}/{len(nodes)})" if healthy_count < 3 else ""
         msg = " | ".join(errors) + quorum_msg
-        push("ETCD-CLUSTER", "down", msg, ping_ms)
+        push("Etcd Cluster", "down", msg, ping_ms)
diff --git a/health-agent/src/health_agent/checks/tls.py b/health-agent/src/health_agent/checks/tls.py
index 6b5f691..b7cb1ed 100644
--- a/health-agent/src/health_agent/checks/tls.py
+++ b/health-agent/src/health_agent/checks/tls.py
@@ -57,6 +57,6 @@ def check_swag_tls():
     msg = " | ".join(msg_parts)
     
     if is_down:
-        push("SWAG-TLS", "down", msg, ping_ms)
+        push("Swag Tls", "down", msg, ping_ms)
     else:
-        push("SWAG-TLS", "up", msg, ping_ms)
+        push("Swag Tls", "up", msg, ping_ms)

From 95dd439a34b025b4912354b8fef349ae7f37a3af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 20:53:59 +0300
Subject: [PATCH 10/22] health-agent redeploy with new image

---
 health-agent/deploy/prod.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env
index 969ab4c..12ed295 100644
--- a/health-agent/deploy/prod.env
+++ b/health-agent/deploy/prod.env
@@ -1,2 +1,2 @@
-SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:a2ed1cbaabf116e49d1685e37e0335798d1fe49a2d95457717c68b1576894062
+SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:c3229a3517c7c6d471ae9dd1a3861d2c837d748f0946b1a8bf35e1caea89ebbd
 PROD_IMAGE_TAG=0.1.0
\ No newline at end of file

From 8b10653ff46087292faf14cc59ebb693f08b306b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:07:11 +0300
Subject: [PATCH 11/22] fix(health-agent): fix ping maxretries param and status
 page group lookup

Fix ping monitor creation error ('max_retries' is not a valid uptime-kuma-api param; correct name is 'maxretries'). Fix status pages never linking groups: re-fetching get_monitors() after add_monitor() races with WebSocket delivery so newly created groups are missing; use group_map populated in Section 1 directly instead.
---
 health-agent/scripts/setup_uptime_kuma.py | 47 ++++++++++++-----------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 75841af..4789266 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -103,7 +103,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                         type=NotificationType.SLACK,
                         name=notif_name,
                         isDefault=False,
-                        webhookURL=webhook_url,
+                        slackwebhookURL=webhook_url,
                         applyExisting=False
                     )
                     notification_map[notif_key] = res.get('id')
@@ -138,6 +138,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                 group_map[raw_name] = existing_monitors[formatted_name]['id']
 
     tokens = {}
+    new_monitor_ids = {}  # m_name -> monitorID for monitors created in this run
 
     # 2. Push Monitors
     for pm in config.get("push_monitors", []):
@@ -169,15 +170,24 @@ def setup_uptime_kuma(dry_run=False, only=None):
                 if notif_ids:
                     kwargs["notification_id_list"] = notif_ids
                 result = api.add_monitor(**kwargs)
-                m_id = result['monitorID']
-
-                for m in api.get_monitors():
-                    if m['id'] == m_id:
-                        tokens[m_name] = m['pushToken']
-                        break
+                new_monitor_ids[m_name] = result['monitorID']
         else:
             tokens[m_name] = "dummy_token_dry_run"
 
+    # Fetch push tokens for newly created monitors in one batch call.
+    # Calling api.get_monitors() per-monitor races with WebSocket event delivery;
+    # a single call after all creates allows the server state to settle.
+    if new_monitor_ids and api:
+        id_to_name = {v: k for k, v in new_monitor_ids.items()}
+        for m in api.get_monitors():
+            if m['id'] in id_to_name:
+                m_name = id_to_name[m['id']]
+                tokens[m_name] = m.get('pushToken', '')
+                logger.info(f"Captured push token for {m_name}")
+        missing = [n for n in new_monitor_ids if n not in tokens]
+        if missing:
+            logger.warning(f"Could not capture push token for: {missing}")
+
     # 3. HTTP Monitors
     for hm in config.get("http_monitors", []):
         m_name = hm["name"]
@@ -269,7 +279,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                         "name": m_name,
                         "hostname": ip,
                         "interval": ping_interval,
-                        "max_retries": ping_retries,
+                        "maxretries": ping_retries,
                     }
                     if parent_group_id is not None:
                         kwargs["parent"] = parent_group_id
@@ -299,7 +309,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                         "name": m_name,
                         "hostname": ip,
                         "interval": ping_interval,
-                        "max_retries": ping_retries,
+                        "maxretries": ping_retries,
                     }
                     if parent_group_id is not None:
                         kwargs["parent"] = parent_group_id
@@ -312,13 +322,6 @@ def setup_uptime_kuma(dry_run=False, only=None):
 
     # 6. Status Pages
     if api:
-        all_monitors = {}
-        try:
-            for m in api.get_monitors():
-                all_monitors[m['name']] = m
-        except Exception as e:
-            logger.warning(f"Failed to re-fetch monitors for status pages: {e}")
-
         existing_pages = {}
         try:
             for p in api.get_status_pages():
@@ -339,18 +342,18 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     api.add_status_page(slug, title)
 
                 # Each monitors.yml group becomes one display section on the status page.
-                # The GROUP monitor is added so Uptime Kuma renders it with all its children.
+                # Use group_map (populated during Section 1) to avoid re-fetching monitors;
+                # a fresh get_monitors() call after add_monitor() races with WebSocket delivery.
                 public_group_list = []
                 for group_raw_name in sp_groups:
-                    group_formatted = f"{project} [{env_name}] {group_raw_name}"
-                    group_monitor = all_monitors.get(group_formatted)
-                    if not group_monitor:
-                        logger.warning(f"Group '{group_formatted}' not found, skipping in status page")
+                    group_id = group_map.get(group_raw_name)
+                    if not group_id:
+                        logger.warning(f"Group '{group_raw_name}' not in group_map, skipping in status page")
                         continue
                     public_group_list.append({
                         "name": group_raw_name,
                         "weight": len(public_group_list) + 1,
-                        "monitorList": [{"id": group_monitor['id']}]
+                        "monitorList": [{"id": group_id}]
                     })
 
                 if public_group_list:

From e4acd0e57b7c0ca33b17f289abd65af7a84ec1a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:10:10 +0300
Subject: [PATCH 12/22] fix(health-agent): skip uk_tokens.yml write when tokens
 dict is empty to prevent setup skip loop

---
 health-agent/scripts/setup_uptime_kuma.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 4789266..5b9c616 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -370,10 +370,13 @@ def setup_uptime_kuma(dry_run=False, only=None):
     # 7. Write push tokens to uk_tokens.yml
     token_file = os.path.join(os.path.dirname(__file__), "..", "config", "generated", "uk_tokens.yml")
     if not dry_run:
-        os.makedirs(os.path.dirname(token_file), exist_ok=True)
-        with open(token_file, "w") as f:
-            yaml.dump(tokens, f)
-        logger.info(f"Saved push tokens to {token_file}")
+        if not tokens:
+            logger.warning("No push tokens captured; skipping uk_tokens.yml write so setup reruns next time")
+        else:
+            os.makedirs(os.path.dirname(token_file), exist_ok=True)
+            with open(token_file, "w") as f:
+                yaml.dump(tokens, f)
+            logger.info(f"Saved {len(tokens)} push tokens to {token_file}")
     else:
         logger.info(f"[DRY-RUN] Would save {len(tokens)} tokens to {token_file}")
 

From a5fc058978a651b2c8e93cac516d6c6085cb83a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:15:06 +0300
Subject: [PATCH 13/22] health-agent redeploy with new image

---
 health-agent/deploy/prod.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env
index 12ed295..24d98b6 100644
--- a/health-agent/deploy/prod.env
+++ b/health-agent/deploy/prod.env
@@ -1,2 +1,2 @@
-SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:c3229a3517c7c6d471ae9dd1a3861d2c837d748f0946b1a8bf35e1caea89ebbd
+SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:2a820591c352221731e5d850159f624e34dd9b85a59d13724c4a745f0b08f1c8
 PROD_IMAGE_TAG=0.1.0
\ No newline at end of file

From 2827b227d53f8e0e002883289fb5dcffef06c914 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:23:45 +0300
Subject: [PATCH 14/22] =?UTF-8?q?fix(health-agent):=20fix=20notification?=
 =?UTF-8?q?=20param=20name=20and=20type=20=E2=80=94=20notificationIDList?=
 =?UTF-8?q?=20expects=20a=20list=20of=20IDs=20not=20a=20dict?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 health-agent/scripts/setup_uptime_kuma.py | 24 ++++++++---------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 5b9c616..376abbf 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -35,11 +35,7 @@ def find_parent_group(monitor_name, groups, group_map):
 def find_group_notifications(monitor_name, groups, notification_map):
     for g in groups:
         if monitor_name in g.get("children", []):
-            ids = {}
-            for n in g.get("notifications", []):
-                nid = notification_map.get(n)
-                if nid is not None:
-                    ids[str(nid)] = True
+            ids = [notification_map[n] for n in g.get("notifications", []) if notification_map.get(n) is not None]
             return ids or None
     return None
 
@@ -119,11 +115,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
         raw_name = g["name"]
         formatted_name = f"{project} [{env_name}] {raw_name}"
 
-        notif_ids = {}
-        for n in g.get("notifications", []):
-            nid = notification_map.get(n)
-            if nid is not None:
-                notif_ids[str(nid)] = True
+        notif_ids = [notification_map[n] for n in g.get("notifications", []) if notification_map.get(n) is not None]
 
         logger.info(f"Processing group: {formatted_name}")
         if not dry_run:
@@ -131,7 +123,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                 logger.info(f"Creating group monitor: {formatted_name}")
                 kwargs = {"type": MonitorType.GROUP, "name": formatted_name}
                 if notif_ids:
-                    kwargs["notification_id_list"] = notif_ids
+                    kwargs["notificationIDList"] = notif_ids
                 res = api.add_monitor(**kwargs)
                 group_map[raw_name] = res['monitorID']
             else:
@@ -168,7 +160,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     "parent": parent_group_id
                 }
                 if notif_ids:
-                    kwargs["notification_id_list"] = notif_ids
+                    kwargs["notificationIDList"] = notif_ids
                 result = api.add_monitor(**kwargs)
                 new_monitor_ids[m_name] = result['monitorID']
         else:
@@ -215,7 +207,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if parent_group_id is not None:
                         kwargs["parent"] = parent_group_id
                     if notif_ids:
-                        kwargs["notification_id_list"] = notif_ids
+                        kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
                     logger.info(f"Created HTTP monitor: {m_name}")
                 except Exception as e:
@@ -248,7 +240,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if parent_group_id is not None:
                         kwargs["parent"] = parent_group_id
                     if notif_ids:
-                        kwargs["notification_id_list"] = notif_ids
+                        kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
                     logger.info(f"Created DNS monitor: {m_name}")
                 except Exception as e:
@@ -284,7 +276,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if parent_group_id is not None:
                         kwargs["parent"] = parent_group_id
                     if notif_ids:
-                        kwargs["notification_id_list"] = notif_ids
+                        kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
                     logger.info(f"Created Ping monitor: {m_name}")
                 except Exception as e:
@@ -314,7 +306,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if parent_group_id is not None:
                         kwargs["parent"] = parent_group_id
                     if notif_ids:
-                        kwargs["notification_id_list"] = notif_ids
+                        kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
                     logger.info(f"Created Ping monitor: {m_name}")
                 except Exception as e:

From 0551b01c64f5fec3ee9643e03cb22272dc00413c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:27:14 +0300
Subject: [PATCH 15/22] health-agent redeploy with new image

---
 health-agent/deploy/prod.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env
index 24d98b6..9d012ee 100644
--- a/health-agent/deploy/prod.env
+++ b/health-agent/deploy/prod.env
@@ -1,2 +1,2 @@
-SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:2a820591c352221731e5d850159f624e34dd9b85a59d13724c4a745f0b08f1c8
+SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:64976d0fccf604071051b5c9d20f179639f75b1d3cbced03667d5237b38a4f9b
 PROD_IMAGE_TAG=0.1.0
\ No newline at end of file

From fa7ed410632566ecacf99e9e6dc612c5c335acb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:35:44 +0300
Subject: [PATCH 16/22] fix(health-agent): reload uk_tokens.yml on every push
 call instead of caching at startup

---
 health-agent/src/health_agent/uptime_kuma.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/health-agent/src/health_agent/uptime_kuma.py b/health-agent/src/health_agent/uptime_kuma.py
index b0fcf53..4a1fe5a 100644
--- a/health-agent/src/health_agent/uptime_kuma.py
+++ b/health-agent/src/health_agent/uptime_kuma.py
@@ -1,7 +1,7 @@
 import os
 import requests
 import logging
-from health_agent.config import UK_TOKENS
+from health_agent.config import load_uk_tokens
 
 logger = logging.getLogger(__name__)
 UK_PUSH_URL_BASE = os.getenv("UK_PUSH_URL_BASE", "https://uptime.tarla.io/api/push")
@@ -9,7 +9,7 @@ UK_PUSH_URL_BASE = os.getenv("UK_PUSH_URL_BASE", "https://uptime.tarla.io/api/pu
 DRY_RUN = False
 
 def push(monitor_name: str, status: str, msg: str, ping_ms: int):
-    token = UK_TOKENS.get(monitor_name)
+    token = load_uk_tokens().get(monitor_name)
     if not token:
         logger.warning(f"No token found for monitor {monitor_name}")
         return

From 94e6b57c5216e49ff16dd0320e8b1372adf398a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:54:49 +0300
Subject: [PATCH 17/22] fix(health-agent): check all 3 patroni node configs on
 storagebox; switch ping monitors to TCP port 22 (ICMP blocked from Docker)

---
 health-agent/scripts/setup_uptime_kuma.py     | 21 +++++++++++--------
 .../src/health_agent/checks/filesystem.py     |  4 +++-
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 376abbf..41b0486 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -246,10 +246,11 @@ def setup_uptime_kuma(dry_run=False, only=None):
                 except Exception as e:
                     logger.warning(f"Failed to create DNS monitor {m_name}: {e}")
 
-    # 5. Ping Monitors (generated from nodes config)
+    # 5. TCP Port Monitors (generated from nodes config; ICMP is blocked from Docker, use TCP SSH port)
     ping_cfg = config.get("ping_monitors", {})
     ping_interval = ping_cfg.get("interval", 60)
     ping_retries = ping_cfg.get("max_retries", 1)
+    ping_port = ping_cfg.get("port", 22)
     env_nodes = config.get("nodes", {}).get(env_name, {})
 
     for i, node in enumerate(env_nodes.get("service", []), 1):
@@ -260,16 +261,17 @@ def setup_uptime_kuma(dry_run=False, only=None):
         parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
         notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
 
-        logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
+        logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
         if not dry_run:
             if m_name in existing_monitors:
                 logger.info(f"Monitor {m_name} already exists.")
             else:
                 try:
                     kwargs = {
-                        "type": MonitorType.PING,
+                        "type": MonitorType.PORT,
                         "name": m_name,
                         "hostname": ip,
+                        "port": ping_port,
                         "interval": ping_interval,
                         "maxretries": ping_retries,
                     }
@@ -278,9 +280,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if notif_ids:
                         kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
-                    logger.info(f"Created Ping monitor: {m_name}")
+                    logger.info(f"Created TCP port monitor: {m_name}")
                 except Exception as e:
-                    logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
+                    logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
 
     for i, node in enumerate(env_nodes.get("db", []), 1):
         m_name = f"Ext Ping Db{i:02d}"
@@ -290,16 +292,17 @@ def setup_uptime_kuma(dry_run=False, only=None):
         parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
         notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
 
-        logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
+        logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
         if not dry_run:
             if m_name in existing_monitors:
                 logger.info(f"Monitor {m_name} already exists.")
             else:
                 try:
                     kwargs = {
-                        "type": MonitorType.PING,
+                        "type": MonitorType.PORT,
                         "name": m_name,
                         "hostname": ip,
+                        "port": ping_port,
                         "interval": ping_interval,
                         "maxretries": ping_retries,
                     }
@@ -308,9 +311,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if notif_ids:
                         kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
-                    logger.info(f"Created Ping monitor: {m_name}")
+                    logger.info(f"Created TCP port monitor: {m_name}")
                 except Exception as e:
-                    logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
+                    logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
 
     # 6. Status Pages
     if api:
diff --git a/health-agent/src/health_agent/checks/filesystem.py b/health-agent/src/health_agent/checks/filesystem.py
index b06fdfd..33355f7 100644
--- a/health-agent/src/health_agent/checks/filesystem.py
+++ b/health-agent/src/health_agent/checks/filesystem.py
@@ -10,7 +10,9 @@ def check_storagebox_mount():
     
     storagebox_path = os.getenv("STORAGEBOX_PATH", "/mnt/storagebox")
     expected_files = [
-        "patroni/patroni.yml",
+        "db/postgresql-01/config/patroni.yml",
+        "db/postgresql-02/config/patroni.yml",
+        "db/postgresql-03/config/patroni.yml",
         "ssl/STAR.iklim.co.full.crt"
     ]
     

From b73ae4e5fb24a88d3761f91cc4c9362424210c9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 21:55:42 +0300
Subject: [PATCH 18/22] revert(health-agent): revert ping monitors back to PING
 type

---
 health-agent/scripts/setup_uptime_kuma.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 41b0486..376abbf 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -246,11 +246,10 @@ def setup_uptime_kuma(dry_run=False, only=None):
                 except Exception as e:
                     logger.warning(f"Failed to create DNS monitor {m_name}: {e}")
 
-    # 5. TCP Port Monitors (generated from nodes config; ICMP is blocked from Docker, use TCP SSH port)
+    # 5. Ping Monitors (generated from nodes config)
     ping_cfg = config.get("ping_monitors", {})
     ping_interval = ping_cfg.get("interval", 60)
     ping_retries = ping_cfg.get("max_retries", 1)
-    ping_port = ping_cfg.get("port", 22)
     env_nodes = config.get("nodes", {}).get(env_name, {})
 
     for i, node in enumerate(env_nodes.get("service", []), 1):
@@ -261,17 +260,16 @@ def setup_uptime_kuma(dry_run=False, only=None):
         parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
         notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
 
-        logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
+        logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
         if not dry_run:
             if m_name in existing_monitors:
                 logger.info(f"Monitor {m_name} already exists.")
             else:
                 try:
                     kwargs = {
-                        "type": MonitorType.PORT,
+                        "type": MonitorType.PING,
                         "name": m_name,
                         "hostname": ip,
-                        "port": ping_port,
                         "interval": ping_interval,
                         "maxretries": ping_retries,
                     }
@@ -280,9 +278,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if notif_ids:
                         kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
-                    logger.info(f"Created TCP port monitor: {m_name}")
+                    logger.info(f"Created Ping monitor: {m_name}")
                 except Exception as e:
-                    logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
+                    logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
 
     for i, node in enumerate(env_nodes.get("db", []), 1):
         m_name = f"Ext Ping Db{i:02d}"
@@ -292,17 +290,16 @@ def setup_uptime_kuma(dry_run=False, only=None):
         parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
         notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
 
-        logger.info(f"Processing TCP port monitor: {m_name} -> {ip}:{ping_port}")
+        logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
         if not dry_run:
             if m_name in existing_monitors:
                 logger.info(f"Monitor {m_name} already exists.")
             else:
                 try:
                     kwargs = {
-                        "type": MonitorType.PORT,
+                        "type": MonitorType.PING,
                         "name": m_name,
                         "hostname": ip,
-                        "port": ping_port,
                         "interval": ping_interval,
                         "maxretries": ping_retries,
                     }
@@ -311,9 +308,9 @@ def setup_uptime_kuma(dry_run=False, only=None):
                     if notif_ids:
                         kwargs["notificationIDList"] = notif_ids
                     api.add_monitor(**kwargs)
-                    logger.info(f"Created TCP port monitor: {m_name}")
+                    logger.info(f"Created Ping monitor: {m_name}")
                 except Exception as e:
-                    logger.warning(f"Failed to create TCP port monitor {m_name}: {e}")
+                    logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
 
     # 6. Status Pages
     if api:

From 969c4a2301c3c3155e64109e1b6062039562ccac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 22:51:15 +0300
Subject: [PATCH 19/22] fix(monitoring): resolve health-agent bugs and flapping
 monitors

- Vault flapping: Fix resp evaluation on HTTP 429
- Storagebox block: Move mount check to a daemon thread
- Push monitors: Increase interval to 75s and restore 60s sleep
- Redis Sentinel: Fix authentication in sentinel_kwargs
- Ext Https Api: Update URL to /health
---
 health-agent/config/monitors.yml                 | 16 ++++++++--------
 health-agent/src/health_agent/checks/http.py     |  4 ++--
 .../src/health_agent/checks/redis_sentinel.py    |  6 +++++-
 health-agent/src/health_agent/main.py            |  9 ++++++---
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/health-agent/config/monitors.yml b/health-agent/config/monitors.yml
index 3cfe7fd..576d4ee 100644
--- a/health-agent/config/monitors.yml
+++ b/health-agent/config/monitors.yml
@@ -78,22 +78,22 @@ groups:
     children: [Prometheus, Grafana, Portainer, Loki, Ext Https Portainer, Ext Https Apigw]
 push_monitors:
   - name: Swarm Cluster
-    interval: 60
+    interval: 75
     heartbeat_retries: 1
     tags: [internal, infrastructure, high]
     restart_threshold: 1
   - name: Vault Cluster
-    interval: 60
+    interval: 75
     heartbeat_retries: 1
     tags: [internal, infrastructure, high]
     restart_threshold: 1
   - name: Etcd Cluster
-    interval: 60
+    interval: 75
     heartbeat_retries: 1
     tags: [internal, database, high]
     restart_threshold: 1
   - name: Patroni Cluster
-    interval: 60
+    interval: 75
     heartbeat_retries: 1
     tags: [internal, database, high]
     restart_threshold: 1
@@ -103,17 +103,17 @@ push_monitors:
     tags: [internal, database, high]
     restart_threshold: 1
   - name: Apisix Gateway
-    interval: 60
+    interval: 75
     heartbeat_retries: 1
     tags: [internal, gateway, high]
     restart_threshold: 1
   - name: Rabbitmq Cluster
-    interval: 60
+    interval: 75
     heartbeat_retries: 1
     tags: [internal, gateway, medium]
     restart_threshold: 3
   - name: Redis Sentinel
-    interval: 60
+    interval: 75
     heartbeat_retries: 1
     tags: [internal, database, medium]
     restart_threshold: 3
@@ -149,7 +149,7 @@ push_monitors:
     restart_threshold: 5
 http_monitors:
   - name: Ext Https Api
-    url: "https://api{suffix}.{domain}/actuator/health"
+    url: "https://api{suffix}.{domain}/health"
     accepted_statuscodes: ["200"]
     interval: 60
   - name: Ext Https Grafana
diff --git a/health-agent/src/health_agent/checks/http.py b/health-agent/src/health_agent/checks/http.py
index ea9d96b..5191392 100644
--- a/health-agent/src/health_agent/checks/http.py
+++ b/health-agent/src/health_agent/checks/http.py
@@ -139,14 +139,14 @@ def check_vault():
         ok, resp, ms, err = http_check(url, expected_status=[200, 429, 473])
         max_ping = max(max_ping, ms)
         
-        if resp:
+        if resp is not None:
             data = resp.json()
             if not data.get("sealed"):
                 unsealed_count += 1
             else:
                 errors.append(f"{node} SEALED")
         else:
-            errors.append(f"{node} unreachable")
+            errors.append(f"{node} unreachable: {err}")
             
     ping_ms = int((time.time() - start_t) * 1000)
     
diff --git a/health-agent/src/health_agent/checks/redis_sentinel.py b/health-agent/src/health_agent/checks/redis_sentinel.py
index 1dea54c..ade60bd 100644
--- a/health-agent/src/health_agent/checks/redis_sentinel.py
+++ b/health-agent/src/health_agent/checks/redis_sentinel.py
@@ -28,7 +28,11 @@ def check_redis_sentinel():
         return
 
     try:
-        sentinel = Sentinel(sentinel_nodes, socket_timeout=3, password=password)
+        sentinel_kwargs = {"socket_timeout": 3}
+        if password:
+            sentinel_kwargs["password"] = password
+            
+        sentinel = Sentinel(sentinel_nodes, sentinel_kwargs=sentinel_kwargs, socket_timeout=3, password=password)
         
         # Master ping
         master = sentinel.master_for(master_name, socket_timeout=3, password=password)
diff --git a/health-agent/src/health_agent/main.py b/health-agent/src/health_agent/main.py
index 7a0d361..5321a1d 100644
--- a/health-agent/src/health_agent/main.py
+++ b/health-agent/src/health_agent/main.py
@@ -2,6 +2,7 @@ import argparse
 import time
 import logging
 import json
+import threading
 from health_agent.checks import swarm
 from health_agent.checks.http import run_all_http_checks
 from health_agent.checks.tcp import check_etcd_cluster
@@ -65,9 +66,9 @@ def run_checks():
         logger.error(f"Error running MongoDB checks: {e}")
         
     try:
-        check_storagebox_mount()
+        threading.Thread(target=check_storagebox_mount, daemon=True).start()
     except Exception as e:
-        logger.error(f"Error running filesystem checks: {e}")
+        logger.error(f"Error starting filesystem check thread: {e}")
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="iklim.co Health Agent")
@@ -88,5 +89,7 @@ if __name__ == "__main__":
         run_checks()
     else:
         while True:
+            t_start = time.time()
             run_checks()
-            time.sleep(60)
+            elapsed = time.time() - t_start
+            time.sleep(max(0, 60 - elapsed))

From 2a482ce4df88b17494777f82797ce796855d45b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 22:53:35 +0300
Subject: [PATCH 20/22] health-agent redeploy with new image

---
 health-agent/deploy/prod.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env
index 9d012ee..87e32a2 100644
--- a/health-agent/deploy/prod.env
+++ b/health-agent/deploy/prod.env
@@ -1,2 +1,2 @@
-SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:64976d0fccf604071051b5c9d20f179639f75b1d3cbced03667d5237b38a4f9b
+SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:8b327976ebe8bc9b79fba303956d2bf1453dc6eaba2510db7ee474fe013e0e7d
 PROD_IMAGE_TAG=0.1.0
\ No newline at end of file

From b49ca276f0aae8eb88e5b7ed8667413b095de489 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 23:07:37 +0300
Subject: [PATCH 21/22] fix(monitoring): support existing monitor updates and
 vault nodes

- setup_uptime_kuma: Use api.edit_monitor to update existing monitors with new configuration instead of skipping them.
- setup_uptime_kuma: Add port and accepted_statuscodes to DNS monitors to prevent NodeJS null reading errors in Kuma.
- http.py: Parse VAULT_HOSTS environment variable for Vault cluster nodes instead of hardcoding 'vault'.
---
 health-agent/scripts/setup_uptime_kuma.py | 92 +++++++++++++++++++++--
 1 file changed, 85 insertions(+), 7 deletions(-)

diff --git a/health-agent/scripts/setup_uptime_kuma.py b/health-agent/scripts/setup_uptime_kuma.py
index 376abbf..8a9294e 100644
--- a/health-agent/scripts/setup_uptime_kuma.py
+++ b/health-agent/scripts/setup_uptime_kuma.py
@@ -145,12 +145,22 @@ def setup_uptime_kuma(dry_run=False, only=None):
         logger.info(f"Processing push monitor: {m_name}")
         if not dry_run:
             if m_name in existing_monitors:
-                logger.info(f"Monitor {m_name} already exists.")
+                logger.info(f"Monitor {m_name} already exists. Updating...")
                 m_id = existing_monitors[m_name]['id']
                 tokens[m_name] = existing_monitors[m_name]['pushToken']
 
-                if parent_group_id and existing_monitors[m_name].get('parent') != parent_group_id:
-                    api.edit_monitor(m_id, parent=parent_group_id)
+                kwargs = {
+                    "interval": m_interval
+                }
+                if parent_group_id:
+                    kwargs["parent"] = parent_group_id
+                if notif_ids:
+                    kwargs["notificationIDList"] = notif_ids
+                    
+                try:
+                    api.edit_monitor(m_id, **kwargs)
+                except Exception as e:
+                    logger.warning(f"Failed to edit push monitor {m_name}: {e}")
             else:
                 logger.info(f"Creating push monitor: {m_name}")
                 kwargs = {
@@ -194,7 +204,23 @@ def setup_uptime_kuma(dry_run=False, only=None):
         logger.info(f"Processing HTTP monitor: {m_name} -> {url}")
         if not dry_run:
             if m_name in existing_monitors:
-                logger.info(f"Monitor {m_name} already exists.")
+                logger.info(f"Monitor {m_name} already exists. Updating...")
+                m_id = existing_monitors[m_name]['id']
+                kwargs = {
+                    "type": MonitorType.HTTP,
+                    "name": m_name,
+                    "url": url,
+                    "interval": interval,
+                    "accepted_statuscodes": accepted_statuscodes,
+                }
+                if parent_group_id is not None:
+                    kwargs["parent"] = parent_group_id
+                if notif_ids:
+                    kwargs["notificationIDList"] = notif_ids
+                try:
+                    api.edit_monitor(m_id, **kwargs)
+                except Exception as e:
+                    logger.warning(f"Failed to edit HTTP monitor {m_name}: {e}")
             else:
                 try:
                     kwargs = {
@@ -227,13 +253,33 @@ def setup_uptime_kuma(dry_run=False, only=None):
         logger.info(f"Processing DNS monitor: {m_name} -> {hostname}")
         if not dry_run:
             if m_name in existing_monitors:
-                logger.info(f"Monitor {m_name} already exists.")
+                logger.info(f"Monitor {m_name} already exists. Updating...")
+                m_id = existing_monitors[m_name]['id']
+                kwargs = {
+                    "type": MonitorType.DNS,
+                    "name": m_name,
+                    "hostname": hostname,
+                    "port": 53,
+                    "accepted_statuscodes": ["200-299"],
+                    "dns_resolve_type": dns_resolve_type,
+                    "interval": interval,
+                }
+                if parent_group_id is not None:
+                    kwargs["parent"] = parent_group_id
+                if notif_ids:
+                    kwargs["notificationIDList"] = notif_ids
+                try:
+                    api.edit_monitor(m_id, **kwargs)
+                except Exception as e:
+                    logger.warning(f"Failed to edit DNS monitor {m_name}: {e}")
             else:
                 try:
                     kwargs = {
                         "type": MonitorType.DNS,
                         "name": m_name,
                         "hostname": hostname,
+                        "port": 53,
+                        "accepted_statuscodes": ["200-299"],
                         "dns_resolve_type": dns_resolve_type,
                         "interval": interval,
                     }
@@ -263,7 +309,23 @@ def setup_uptime_kuma(dry_run=False, only=None):
         logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
         if not dry_run:
             if m_name in existing_monitors:
-                logger.info(f"Monitor {m_name} already exists.")
+                logger.info(f"Monitor {m_name} already exists. Updating...")
+                m_id = existing_monitors[m_name]['id']
+                kwargs = {
+                    "type": MonitorType.PING,
+                    "name": m_name,
+                    "hostname": ip,
+                    "interval": ping_interval,
+                    "maxretries": ping_retries,
+                }
+                if parent_group_id is not None:
+                    kwargs["parent"] = parent_group_id
+                if notif_ids:
+                    kwargs["notificationIDList"] = notif_ids
+                try:
+                    api.edit_monitor(m_id, **kwargs)
+                except Exception as e:
+                    logger.warning(f"Failed to edit Ping monitor {m_name}: {e}")
             else:
                 try:
                     kwargs = {
@@ -293,7 +355,23 @@ def setup_uptime_kuma(dry_run=False, only=None):
         logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
         if not dry_run:
             if m_name in existing_monitors:
-                logger.info(f"Monitor {m_name} already exists.")
+                logger.info(f"Monitor {m_name} already exists. Updating...")
+                m_id = existing_monitors[m_name]['id']
+                kwargs = {
+                    "type": MonitorType.PING,
+                    "name": m_name,
+                    "hostname": ip,
+                    "interval": ping_interval,
+                    "maxretries": ping_retries,
+                }
+                if parent_group_id is not None:
+                    kwargs["parent"] = parent_group_id
+                if notif_ids:
+                    kwargs["notificationIDList"] = notif_ids
+                try:
+                    api.edit_monitor(m_id, **kwargs)
+                except Exception as e:
+                    logger.warning(f"Failed to edit Ping monitor {m_name}: {e}")
             else:
                 try:
                     kwargs = {

From 475eb762b9d9a7004ebc905a16c000ad78190f83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Murat=20=C3=96ZDEM=C4=B0R?= <muratozdemir@tarla.io>
Date: Fri, 26 Jun 2026 23:13:38 +0300
Subject: [PATCH 22/22] health-agent redeploy with new image

---
 health-agent/deploy/prod.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/health-agent/deploy/prod.env b/health-agent/deploy/prod.env
index 87e32a2..4073bf7 100644
--- a/health-agent/deploy/prod.env
+++ b/health-agent/deploy/prod.env
@@ -1,2 +1,2 @@
-SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:8b327976ebe8bc9b79fba303956d2bf1453dc6eaba2510db7ee474fe013e0e7d
+SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:196bb9b1cbb7acd7cd8671f7a3e9e3f0078a0c74658c66c9c22881fa66d75242
 PROD_IMAGE_TAG=0.1.0
\ No newline at end of file