refactor: convert all monitor names to Title Case and update health-agent digest

This commit is contained in:
Murat ÖZDEMİR 2026-06-26 20:47:31 +03:00
parent d51c073556
commit bc8b3d0934
3 changed files with 29 additions and 29 deletions

View File

@ -50,126 +50,126 @@ groups:
status_page: "iklim-{env}-ops" status_page: "iklim-{env}-ops"
notifications: [slack-high] notifications: [slack-high]
tags: [internal, infrastructure] tags: [internal, infrastructure]
children: [SWARM-CLUSTER, VAULT-CLUSTER, STORAGEBOX-MOUNT, SWAG-TLS] children: [Swarm Cluster, Vault Cluster, Storagebox Mount, Swag Tls]
- name: "Data Layer" - name: "Data Layer"
status_page: "iklim-{env}-ops" status_page: "iklim-{env}-ops"
notifications: [slack-high] notifications: [slack-high]
tags: [internal, database] tags: [internal, database]
children: [ETCD-CLUSTER, PATRONI-CLUSTER, MONGODB-REPLICASET] children: [Etcd Cluster, Patroni Cluster, Mongodb Replicaset]
- name: "Gateway & Messaging" - name: "Gateway & Messaging"
status_page: "iklim-{env}-ops" status_page: "iklim-{env}-ops"
notifications: [slack-high] notifications: [slack-high]
tags: [internal, gateway] tags: [internal, gateway]
children: [APISIX-GATEWAY, RABBITMQ-CLUSTER, REDIS-SENTINEL] children: [Apisix Gateway, Rabbitmq Cluster, Redis Sentinel]
- name: "External Availability - Critical" - name: "External Availability - Critical"
status_page: "iklim-{env}-ops" status_page: "iklim-{env}-ops"
notifications: [slack-high] notifications: [slack-high]
tags: [external, high] tags: [external, high]
children: [EXT-HTTPS-API, EXT-DNS-API, EXT-DNS-ROOT, EXT-PING-APP01, EXT-PING-APP02, EXT-PING-APP03] children: [Ext Https Api, Ext Dns Api, Ext Dns Root, Ext Ping App01, Ext Ping App02, Ext Ping App03]
- name: "External Availability - General" - name: "External Availability - General"
status_page: "iklim-{env}-ops" status_page: "iklim-{env}-ops"
notifications: [slack-medium] notifications: [slack-medium]
tags: [external, medium] tags: [external, medium]
children: [EXT-HTTPS-GRAFANA, EXT-PING-DB01, EXT-PING-DB02, EXT-PING-DB03] children: [Ext Https Grafana, Ext Ping Db01, Ext Ping Db02, Ext Ping Db03]
- name: "Observability" - name: "Observability"
status_page: "iklim-{env}-tools" status_page: "iklim-{env}-tools"
notifications: [slack-low] notifications: [slack-low]
tags: [internal, observability] tags: [internal, observability]
children: [PROMETHEUS, GRAFANA, PORTAINER, LOKI, EXT-HTTPS-PORTAINER, EXT-HTTPS-APIGW] children: [Prometheus, Grafana, Portainer, Loki, Ext Https Portainer, Ext Https Apigw]
push_monitors: push_monitors:
- name: SWARM-CLUSTER - name: Swarm Cluster
interval: 60 interval: 60
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, infrastructure, high] tags: [internal, infrastructure, high]
restart_threshold: 1 restart_threshold: 1
- name: VAULT-CLUSTER - name: Vault Cluster
interval: 60 interval: 60
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, infrastructure, high] tags: [internal, infrastructure, high]
restart_threshold: 1 restart_threshold: 1
- name: ETCD-CLUSTER - name: Etcd Cluster
interval: 60 interval: 60
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, database, high] tags: [internal, database, high]
restart_threshold: 1 restart_threshold: 1
- name: PATRONI-CLUSTER - name: Patroni Cluster
interval: 60 interval: 60
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, database, high] tags: [internal, database, high]
restart_threshold: 1 restart_threshold: 1
- name: MONGODB-REPLICASET - name: Mongodb Replicaset
interval: 120 interval: 120
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, database, high] tags: [internal, database, high]
restart_threshold: 1 restart_threshold: 1
- name: APISIX-GATEWAY - name: Apisix Gateway
interval: 60 interval: 60
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, gateway, high] tags: [internal, gateway, high]
restart_threshold: 1 restart_threshold: 1
- name: RABBITMQ-CLUSTER - name: Rabbitmq Cluster
interval: 60 interval: 60
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, gateway, medium] tags: [internal, gateway, medium]
restart_threshold: 3 restart_threshold: 3
- name: REDIS-SENTINEL - name: Redis Sentinel
interval: 60 interval: 60
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, database, medium] tags: [internal, database, medium]
restart_threshold: 3 restart_threshold: 3
- name: SWAG-TLS - name: Swag Tls
interval: 3600 interval: 3600
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, infrastructure, medium] tags: [internal, infrastructure, medium]
restart_threshold: 3 restart_threshold: 3
- name: STORAGEBOX-MOUNT - name: Storagebox Mount
interval: 300 interval: 300
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, infrastructure, medium] tags: [internal, infrastructure, medium]
restart_threshold: 1 restart_threshold: 1
- name: PROMETHEUS - name: Prometheus
interval: 120 interval: 120
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, observability, low] tags: [internal, observability, low]
restart_threshold: 5 restart_threshold: 5
- name: GRAFANA - name: Grafana
interval: 120 interval: 120
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, observability, low] tags: [internal, observability, low]
restart_threshold: 5 restart_threshold: 5
- name: PORTAINER - name: Portainer
interval: 120 interval: 120
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, observability, low] tags: [internal, observability, low]
restart_threshold: 5 restart_threshold: 5
- name: LOKI - name: Loki
interval: 120 interval: 120
heartbeat_retries: 1 heartbeat_retries: 1
tags: [internal, observability, low] tags: [internal, observability, low]
restart_threshold: 5 restart_threshold: 5
http_monitors: http_monitors:
- name: EXT-HTTPS-API - name: Ext Https Api
url: "https://api{suffix}.{domain}/actuator/health" url: "https://api{suffix}.{domain}/actuator/health"
accepted_statuscodes: ["200"] accepted_statuscodes: ["200"]
interval: 60 interval: 60
- name: EXT-HTTPS-GRAFANA - name: Ext Https Grafana
url: "https://grafana{suffix}.{domain}/api/health" url: "https://grafana{suffix}.{domain}/api/health"
accepted_statuscodes: ["200"] accepted_statuscodes: ["200"]
interval: 60 interval: 60
- name: EXT-HTTPS-PORTAINER - name: Ext Https Portainer
url: "https://portainer{suffix}.{domain}" url: "https://portainer{suffix}.{domain}"
accepted_statuscodes: ["200", "401", "403"] accepted_statuscodes: ["200", "401", "403"]
interval: 120 interval: 120
- name: EXT-HTTPS-APIGW - name: Ext Https Apigw
url: "https://apigw{suffix}.{domain}" url: "https://apigw{suffix}.{domain}"
accepted_statuscodes: ["200", "401", "403"] accepted_statuscodes: ["200", "401", "403"]
interval: 120 interval: 120
dns_monitors: dns_monitors:
- name: EXT-DNS-API - name: Ext Dns Api
hostname: "api{suffix}.{domain}" hostname: "api{suffix}.{domain}"
dns_resolve_type: A dns_resolve_type: A
interval: 60 interval: 60
- name: EXT-DNS-ROOT - name: Ext Dns Root
hostname: "{domain}" hostname: "{domain}"
dns_resolve_type: A dns_resolve_type: A
interval: 60 interval: 60

View File

@ -1,2 +1,2 @@
SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:e262bf6e6712862ba24551dc326411ebb0987da59072834b2923bd73cb5c9d3b SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:a2ed1cbaabf116e49d1685e37e0335798d1fe49a2d95457717c68b1576894062
PROD_IMAGE_TAG=0.1.0 PROD_IMAGE_TAG=0.1.0

View File

@ -251,7 +251,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
env_nodes = config.get("nodes", {}).get(env_name, {}) env_nodes = config.get("nodes", {}).get(env_name, {})
for i, node in enumerate(env_nodes.get("service", []), 1): for i, node in enumerate(env_nodes.get("service", []), 1):
m_name = f"EXT-PING-APP{i:02d}" m_name = f"Ext Ping App{i:02d}"
if only and m_name != only: if only and m_name != only:
continue continue
ip = node["ip"] ip = node["ip"]
@ -281,7 +281,7 @@ def setup_uptime_kuma(dry_run=False, only=None):
logger.warning(f"Failed to create Ping monitor {m_name}: {e}") logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
for i, node in enumerate(env_nodes.get("db", []), 1): for i, node in enumerate(env_nodes.get("db", []), 1):
m_name = f"EXT-PING-DB{i:02d}" m_name = f"Ext Ping Db{i:02d}"
if only and m_name != only: if only and m_name != only:
continue continue
ip = node["ip"] ip = node["ip"]