Merge branch 'prod-env'
This commit is contained in:
commit
847bb86876
@ -4,11 +4,7 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- prod-env
|
- prod-env
|
||||||
paths:
|
|
||||||
- 'docker-stack-monitoring.yml'
|
|
||||||
- 'health-agent/deploy/prod.env'
|
|
||||||
- 'swag/**'
|
|
||||||
- '.gitea/workflows/deploy-monitoring-prod.yml'
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: prod-monitoring-deploy
|
group: prod-monitoring-deploy
|
||||||
@ -94,7 +90,7 @@ jobs:
|
|||||||
export SPRING_PROFILES_ACTIVE=PROD
|
export SPRING_PROFILES_ACTIVE=PROD
|
||||||
source_env_file ./health-agent/.env
|
source_env_file ./health-agent/.env
|
||||||
mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
||||||
if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
|
if [ ! -s "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
|
||||||
docker run --rm \
|
docker run --rm \
|
||||||
-v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
|
-v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
|
||||||
--env-file "$(pwd)/health-agent/.env" \
|
--env-file "$(pwd)/health-agent/.env" \
|
||||||
@ -123,14 +119,14 @@ jobs:
|
|||||||
--with-registry-auth \
|
--with-registry-auth \
|
||||||
--resolve-image changed \
|
--resolve-image changed \
|
||||||
-c docker-stack-monitoring.yml \
|
-c docker-stack-monitoring.yml \
|
||||||
iklimco-monitoring
|
monitoring
|
||||||
|
|
||||||
- name: Wait for Loki
|
- name: Wait for Loki
|
||||||
run: |
|
run: |
|
||||||
source ./common-functions-base.sh
|
source ./common-functions-base.sh
|
||||||
export SPRING_PROFILES_ACTIVE=PROD
|
export SPRING_PROFILES_ACTIVE=PROD
|
||||||
for i in $(seq 1 36); do
|
for i in $(seq 1 36); do
|
||||||
REPLICAS=$(docker service ls --filter name=iklimco-monitoring_loki --format "{{.Replicas}}" | head -1)
|
REPLICAS=$(docker service ls --filter name=monitoring_loki --format "{{.Replicas}}" | head -1)
|
||||||
if echo "$REPLICAS" | awk -F'[/ ]' '$1>0 && $1==$2{found=1} END{exit !found}'; then
|
if echo "$REPLICAS" | awk -F'[/ ]' '$1>0 && $1==$2{found=1} END{exit !found}'; then
|
||||||
log_message "SUCCESS" "Loki is ready: $REPLICAS"
|
log_message "SUCCESS" "Loki is ready: $REPLICAS"
|
||||||
exit 0
|
exit 0
|
||||||
@ -138,7 +134,7 @@ jobs:
|
|||||||
log_message "INFO" "Loki not ready yet (${REPLICAS:-missing}), waiting 5s..."
|
log_message "INFO" "Loki not ready yet (${REPLICAS:-missing}), waiting 5s..."
|
||||||
sleep 5
|
sleep 5
|
||||||
done
|
done
|
||||||
docker service ps iklimco-monitoring_loki || true
|
docker service ps monitoring_loki || true
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
- name: Configure SWAG Reverse Proxy
|
- name: Configure SWAG Reverse Proxy
|
||||||
@ -194,6 +190,6 @@ jobs:
|
|||||||
|
|
||||||
- name: Verify Deployment
|
- name: Verify Deployment
|
||||||
run: |
|
run: |
|
||||||
docker service ps iklimco-monitoring_loki \
|
docker service ps monitoring_loki \
|
||||||
--filter "desired-state=running" \
|
--filter "desired-state=running" \
|
||||||
--format "table {{.Name}}\t{{.Node}}\t{{.CurrentState}}\t{{.Image}}" | head -20
|
--format "table {{.Name}}\t{{.Node}}\t{{.CurrentState}}\t{{.Image}}" | head -20
|
||||||
|
|||||||
@ -4,11 +4,7 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- test
|
- test
|
||||||
paths:
|
|
||||||
- 'docker-stack-monitoring.yml'
|
|
||||||
- 'health-agent/**'
|
|
||||||
- 'swag/**'
|
|
||||||
- '.gitea/workflows/deploy-monitoring-test.yml'
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
deploy:
|
deploy:
|
||||||
@ -84,7 +80,7 @@ jobs:
|
|||||||
export SPRING_PROFILES_ACTIVE=TEST
|
export SPRING_PROFILES_ACTIVE=TEST
|
||||||
source_env_file ./health-agent/.env
|
source_env_file ./health-agent/.env
|
||||||
mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
mkdir -p "${HEALTH_AGENT_CONFIG_GENERATED_DIR}"
|
||||||
if [ ! -f "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
|
if [ ! -s "${HEALTH_AGENT_CONFIG_GENERATED_DIR}/uk_tokens.yml" ]; then
|
||||||
docker run --rm \
|
docker run --rm \
|
||||||
-v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
|
-v "${HEALTH_AGENT_CONFIG_GENERATED_DIR}:/app/config/generated" \
|
||||||
--env-file "$(pwd)/health-agent/.env" \
|
--env-file "$(pwd)/health-agent/.env" \
|
||||||
@ -109,14 +105,14 @@ jobs:
|
|||||||
--with-registry-auth \
|
--with-registry-auth \
|
||||||
--resolve-image changed \
|
--resolve-image changed \
|
||||||
-c docker-stack-monitoring.yml \
|
-c docker-stack-monitoring.yml \
|
||||||
iklimco-monitoring
|
monitoring
|
||||||
|
|
||||||
- name: Wait for Loki
|
- name: Wait for Loki
|
||||||
run: |
|
run: |
|
||||||
source ./common-functions-base.sh
|
source ./common-functions-base.sh
|
||||||
export SPRING_PROFILES_ACTIVE=TEST
|
export SPRING_PROFILES_ACTIVE=TEST
|
||||||
for i in $(seq 1 36); do
|
for i in $(seq 1 36); do
|
||||||
REPLICAS=$(docker service ls --filter name=iklimco-monitoring_loki --format "{{.Replicas}}" | head -1)
|
REPLICAS=$(docker service ls --filter name=monitoring_loki --format "{{.Replicas}}" | head -1)
|
||||||
if echo "$REPLICAS" | awk -F'[/ ]' '$1>0 && $1==$2{found=1} END{exit !found}'; then
|
if echo "$REPLICAS" | awk -F'[/ ]' '$1>0 && $1==$2{found=1} END{exit !found}'; then
|
||||||
log_message "SUCCESS" "Loki is ready: $REPLICAS"
|
log_message "SUCCESS" "Loki is ready: $REPLICAS"
|
||||||
exit 0
|
exit 0
|
||||||
@ -124,7 +120,7 @@ jobs:
|
|||||||
log_message "INFO" "Loki not ready yet (${REPLICAS:-missing}), waiting 5s..."
|
log_message "INFO" "Loki not ready yet (${REPLICAS:-missing}), waiting 5s..."
|
||||||
sleep 5
|
sleep 5
|
||||||
done
|
done
|
||||||
docker service ps iklimco-monitoring_loki || true
|
docker service ps monitoring_loki || true
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
- name: Configure SWAG Reverse Proxy
|
- name: Configure SWAG Reverse Proxy
|
||||||
@ -180,6 +176,6 @@ jobs:
|
|||||||
|
|
||||||
- name: Verify Deployment
|
- name: Verify Deployment
|
||||||
run: |
|
run: |
|
||||||
docker service ps iklimco-monitoring_loki \
|
docker service ps monitoring_loki \
|
||||||
--filter "desired-state=running" \
|
--filter "desired-state=running" \
|
||||||
--format "table {{.Name}}\t{{.Node}}\t{{.CurrentState}}\t{{.Image}}" | head -20
|
--format "table {{.Name}}\t{{.Node}}\t{{.CurrentState}}\t{{.Image}}" | head -20
|
||||||
|
|||||||
@ -135,7 +135,7 @@ Mevcut dashboard'lara log paneli eklemek için:
|
|||||||
docker stack deploy \
|
docker stack deploy \
|
||||||
--with-registry-auth \
|
--with-registry-auth \
|
||||||
-c Environment_Monitoring/docker-stack-monitoring.yml \
|
-c Environment_Monitoring/docker-stack-monitoring.yml \
|
||||||
iklimco-monitoring
|
monitoring
|
||||||
```
|
```
|
||||||
|
|
||||||
Prod için Gitea workflow'u: `Environment_Monitoring/.gitea/workflows/deploy-monitoring-prod.yml`
|
Prod için Gitea workflow'u: `Environment_Monitoring/.gitea/workflows/deploy-monitoring-prod.yml`
|
||||||
|
|||||||
@ -190,7 +190,7 @@ python scripts/setup_uptime_kuma.py
|
|||||||
docker stack deploy \
|
docker stack deploy \
|
||||||
--with-registry-auth \
|
--with-registry-auth \
|
||||||
-c docker-stack-monitoring.yml \
|
-c docker-stack-monitoring.yml \
|
||||||
iklimco-monitoring
|
monitoring
|
||||||
```
|
```
|
||||||
|
|
||||||
Health-agent `iklimco-net` overlay ağına bağlı olmalı ve Docker socket'a salt okunur erişimi olmalıdır.
|
Health-agent `iklimco-net` overlay ağına bağlı olmalı ve Docker socket'a salt okunur erişimi olmalıdır.
|
||||||
@ -199,7 +199,7 @@ Health-agent `iklimco-net` overlay ağına bağlı olmalı ve Docker socket'a sa
|
|||||||
|
|
||||||
## Log Formatı
|
## Log Formatı
|
||||||
|
|
||||||
Agent JSON formatında log üretir. Grafana Explore (Loki datasource, `{service="iklimco-monitoring_health-agent"}`) veya `docker service logs iklimco-monitoring_health-agent` ile izlenebilir. Her log girdisi şu alanları içerir:
|
Agent JSON formatında log üretir. Grafana Explore (Loki datasource, `{service="monitoring_health-agent"}`) veya `docker service logs monitoring_health-agent` ile izlenebilir. Her log girdisi şu alanları içerir:
|
||||||
|
|
||||||
- `check` — monitor adı
|
- `check` — monitor adı
|
||||||
- `status` — `up` veya `down`
|
- `status` — `up` veya `down`
|
||||||
|
|||||||
@ -50,126 +50,126 @@ groups:
|
|||||||
status_page: "iklim-{env}-ops"
|
status_page: "iklim-{env}-ops"
|
||||||
notifications: [slack-high]
|
notifications: [slack-high]
|
||||||
tags: [internal, infrastructure]
|
tags: [internal, infrastructure]
|
||||||
children: [SWARM-CLUSTER, VAULT-CLUSTER, STORAGEBOX-MOUNT, SWAG-TLS]
|
children: [Swarm Cluster, Vault Cluster, Storagebox Mount, Swag Tls]
|
||||||
- name: "Data Layer"
|
- name: "Data Layer"
|
||||||
status_page: "iklim-{env}-ops"
|
status_page: "iklim-{env}-ops"
|
||||||
notifications: [slack-high]
|
notifications: [slack-high]
|
||||||
tags: [internal, database]
|
tags: [internal, database]
|
||||||
children: [ETCD-CLUSTER, PATRONI-CLUSTER, MONGODB-REPLICASET]
|
children: [Etcd Cluster, Patroni Cluster, Mongodb Replicaset]
|
||||||
- name: "Gateway & Messaging"
|
- name: "Gateway & Messaging"
|
||||||
status_page: "iklim-{env}-ops"
|
status_page: "iklim-{env}-ops"
|
||||||
notifications: [slack-high]
|
notifications: [slack-high]
|
||||||
tags: [internal, gateway]
|
tags: [internal, gateway]
|
||||||
children: [APISIX-GATEWAY, RABBITMQ-CLUSTER, REDIS-SENTINEL]
|
children: [Apisix Gateway, Rabbitmq Cluster, Redis Sentinel]
|
||||||
- name: "External Availability - Critical"
|
- name: "External Availability - Critical"
|
||||||
status_page: "iklim-{env}-ops"
|
status_page: "iklim-{env}-ops"
|
||||||
notifications: [slack-high]
|
notifications: [slack-high]
|
||||||
tags: [external, high]
|
tags: [external, high]
|
||||||
children: [EXT-HTTPS-API, EXT-DNS-API, EXT-DNS-ROOT, EXT-PING-APP01, EXT-PING-APP02, EXT-PING-APP03]
|
children: [Ext Https Api, Ext Dns Api, Ext Dns Root, Ext Ping App01, Ext Ping App02, Ext Ping App03]
|
||||||
- name: "External Availability - General"
|
- name: "External Availability - General"
|
||||||
status_page: "iklim-{env}-ops"
|
status_page: "iklim-{env}-ops"
|
||||||
notifications: [slack-medium]
|
notifications: [slack-medium]
|
||||||
tags: [external, medium]
|
tags: [external, medium]
|
||||||
children: [EXT-HTTPS-GRAFANA, EXT-PING-DB01, EXT-PING-DB02, EXT-PING-DB03]
|
children: [Ext Https Grafana, Ext Ping Db01, Ext Ping Db02, Ext Ping Db03]
|
||||||
- name: "Observability"
|
- name: "Observability"
|
||||||
status_page: "iklim-{env}-tools"
|
status_page: "iklim-{env}-tools"
|
||||||
notifications: [slack-low]
|
notifications: [slack-low]
|
||||||
tags: [internal, observability]
|
tags: [internal, observability]
|
||||||
children: [PROMETHEUS, GRAFANA, PORTAINER, LOKI, EXT-HTTPS-PORTAINER, EXT-HTTPS-APIGW]
|
children: [Prometheus, Grafana, Portainer, Loki, Ext Https Portainer, Ext Https Apigw]
|
||||||
push_monitors:
|
push_monitors:
|
||||||
- name: SWARM-CLUSTER
|
- name: Swarm Cluster
|
||||||
interval: 60
|
interval: 75
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, infrastructure, high]
|
tags: [internal, infrastructure, high]
|
||||||
restart_threshold: 1
|
restart_threshold: 1
|
||||||
- name: VAULT-CLUSTER
|
- name: Vault Cluster
|
||||||
interval: 60
|
interval: 75
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, infrastructure, high]
|
tags: [internal, infrastructure, high]
|
||||||
restart_threshold: 1
|
restart_threshold: 1
|
||||||
- name: ETCD-CLUSTER
|
- name: Etcd Cluster
|
||||||
interval: 60
|
interval: 75
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, database, high]
|
tags: [internal, database, high]
|
||||||
restart_threshold: 1
|
restart_threshold: 1
|
||||||
- name: PATRONI-CLUSTER
|
- name: Patroni Cluster
|
||||||
interval: 60
|
interval: 75
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, database, high]
|
tags: [internal, database, high]
|
||||||
restart_threshold: 1
|
restart_threshold: 1
|
||||||
- name: MONGODB-REPLICASET
|
- name: Mongodb Replicaset
|
||||||
interval: 120
|
interval: 120
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, database, high]
|
tags: [internal, database, high]
|
||||||
restart_threshold: 1
|
restart_threshold: 1
|
||||||
- name: APISIX-GATEWAY
|
- name: Apisix Gateway
|
||||||
interval: 60
|
interval: 75
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, gateway, high]
|
tags: [internal, gateway, high]
|
||||||
restart_threshold: 1
|
restart_threshold: 1
|
||||||
- name: RABBITMQ-CLUSTER
|
- name: Rabbitmq Cluster
|
||||||
interval: 60
|
interval: 75
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, gateway, medium]
|
tags: [internal, gateway, medium]
|
||||||
restart_threshold: 3
|
restart_threshold: 3
|
||||||
- name: REDIS-SENTINEL
|
- name: Redis Sentinel
|
||||||
interval: 60
|
interval: 75
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, database, medium]
|
tags: [internal, database, medium]
|
||||||
restart_threshold: 3
|
restart_threshold: 3
|
||||||
- name: SWAG-TLS
|
- name: Swag Tls
|
||||||
interval: 3600
|
interval: 3600
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, infrastructure, medium]
|
tags: [internal, infrastructure, medium]
|
||||||
restart_threshold: 3
|
restart_threshold: 3
|
||||||
- name: STORAGEBOX-MOUNT
|
- name: Storagebox Mount
|
||||||
interval: 300
|
interval: 300
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, infrastructure, medium]
|
tags: [internal, infrastructure, medium]
|
||||||
restart_threshold: 1
|
restart_threshold: 1
|
||||||
- name: PROMETHEUS
|
- name: Prometheus
|
||||||
interval: 120
|
interval: 120
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, observability, low]
|
tags: [internal, observability, low]
|
||||||
restart_threshold: 5
|
restart_threshold: 5
|
||||||
- name: GRAFANA
|
- name: Grafana
|
||||||
interval: 120
|
interval: 120
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, observability, low]
|
tags: [internal, observability, low]
|
||||||
restart_threshold: 5
|
restart_threshold: 5
|
||||||
- name: PORTAINER
|
- name: Portainer
|
||||||
interval: 120
|
interval: 120
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, observability, low]
|
tags: [internal, observability, low]
|
||||||
restart_threshold: 5
|
restart_threshold: 5
|
||||||
- name: LOKI
|
- name: Loki
|
||||||
interval: 120
|
interval: 120
|
||||||
heartbeat_retries: 1
|
heartbeat_retries: 1
|
||||||
tags: [internal, observability, low]
|
tags: [internal, observability, low]
|
||||||
restart_threshold: 5
|
restart_threshold: 5
|
||||||
http_monitors:
|
http_monitors:
|
||||||
- name: EXT-HTTPS-API
|
- name: Ext Https Api
|
||||||
url: "https://api{suffix}.{domain}/actuator/health"
|
url: "https://api{suffix}.{domain}/health"
|
||||||
accepted_statuscodes: ["200"]
|
accepted_statuscodes: ["200"]
|
||||||
interval: 60
|
interval: 60
|
||||||
- name: EXT-HTTPS-GRAFANA
|
- name: Ext Https Grafana
|
||||||
url: "https://grafana{suffix}.{domain}/api/health"
|
url: "https://grafana{suffix}.{domain}/api/health"
|
||||||
accepted_statuscodes: ["200"]
|
accepted_statuscodes: ["200"]
|
||||||
interval: 60
|
interval: 60
|
||||||
- name: EXT-HTTPS-PORTAINER
|
- name: Ext Https Portainer
|
||||||
url: "https://portainer{suffix}.{domain}"
|
url: "https://portainer{suffix}.{domain}"
|
||||||
accepted_statuscodes: ["200", "401", "403"]
|
accepted_statuscodes: ["200", "401", "403"]
|
||||||
interval: 120
|
interval: 120
|
||||||
- name: EXT-HTTPS-APIGW
|
- name: Ext Https Apigw
|
||||||
url: "https://apigw{suffix}.{domain}"
|
url: "https://apigw{suffix}.{domain}"
|
||||||
accepted_statuscodes: ["200", "401", "403"]
|
accepted_statuscodes: ["200", "401", "403"]
|
||||||
interval: 120
|
interval: 120
|
||||||
dns_monitors:
|
dns_monitors:
|
||||||
- name: EXT-DNS-API
|
- name: Ext Dns Api
|
||||||
hostname: "api{suffix}.{domain}"
|
hostname: "api{suffix}.{domain}"
|
||||||
dns_resolve_type: A
|
dns_resolve_type: A
|
||||||
interval: 60
|
interval: 60
|
||||||
- name: EXT-DNS-ROOT
|
- name: Ext Dns Root
|
||||||
hostname: "{domain}"
|
hostname: "{domain}"
|
||||||
dns_resolve_type: A
|
dns_resolve_type: A
|
||||||
interval: 60
|
interval: 60
|
||||||
|
|||||||
@ -1,2 +1,2 @@
|
|||||||
SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:fadf229d4423075d2871f9dc4a5a0afdf6dfe7c5fcd04d866b2d6d6fe8942b56
|
SOURCE_IMAGE_DIGEST=registry.tarla.io/iklimco/health-agent@sha256:196bb9b1cbb7acd7cd8671f7a3e9e3f0078a0c74658c66c9c22881fa66d75242
|
||||||
PROD_IMAGE_TAG=0.1.0
|
PROD_IMAGE_TAG=0.1.0
|
||||||
@ -3,7 +3,7 @@ import argparse
|
|||||||
import yaml
|
import yaml
|
||||||
import logging
|
import logging
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from uptime_kuma_api import UptimeKumaApi, MonitorType
|
from uptime_kuma_api import UptimeKumaApi, MonitorType, NotificationType
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||||
logger = logging.getLogger("uk-setup")
|
logger = logging.getLogger("uk-setup")
|
||||||
@ -12,11 +12,34 @@ _root = os.path.join(os.path.dirname(__file__), "..")
|
|||||||
load_dotenv(os.path.join(_root, ".env"))
|
load_dotenv(os.path.join(_root, ".env"))
|
||||||
load_dotenv(os.path.join(_root, ".env.setup"))
|
load_dotenv(os.path.join(_root, ".env.setup"))
|
||||||
|
|
||||||
|
|
||||||
def format_str(text, env_name, project):
|
def format_str(text, env_name, project):
|
||||||
if not isinstance(text, str):
|
if not isinstance(text, str):
|
||||||
return text
|
return text
|
||||||
return text.replace("{env}", env_name).replace("{project}", project)
|
return text.replace("{env}", env_name).replace("{project}", project)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_template(text, suffix, domain):
|
||||||
|
if not isinstance(text, str):
|
||||||
|
return text
|
||||||
|
return text.replace("{suffix}", suffix).replace("{domain}", domain)
|
||||||
|
|
||||||
|
|
||||||
|
def find_parent_group(monitor_name, groups, group_map):
|
||||||
|
for g in groups:
|
||||||
|
if monitor_name in g.get("children", []):
|
||||||
|
return group_map.get(g["name"])
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def find_group_notifications(monitor_name, groups, notification_map):
|
||||||
|
for g in groups:
|
||||||
|
if monitor_name in g.get("children", []):
|
||||||
|
ids = [notification_map[n] for n in g.get("notifications", []) if notification_map.get(n) is not None]
|
||||||
|
return ids or None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def setup_uptime_kuma(dry_run=False, only=None):
|
def setup_uptime_kuma(dry_run=False, only=None):
|
||||||
env_name = os.getenv("ENV", "test")
|
env_name = os.getenv("ENV", "test")
|
||||||
|
|
||||||
@ -25,6 +48,8 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
config = yaml.safe_load(f)
|
config = yaml.safe_load(f)
|
||||||
|
|
||||||
project = config.get("project", "iklim")
|
project = config.get("project", "iklim")
|
||||||
|
domain = os.getenv("EXTERNAL_DOMAIN", config.get("domain", {}).get("base", "iklim.co"))
|
||||||
|
suffix = os.getenv("EXTERNAL_SUBDOMAIN_SUFFIX", "")
|
||||||
|
|
||||||
kuma_url = os.getenv("UK_URL", "http://localhost:3001")
|
kuma_url = os.getenv("UK_URL", "http://localhost:3001")
|
||||||
kuma_user = os.getenv("UK_USER", "admin")
|
kuma_user = os.getenv("UK_USER", "admin")
|
||||||
@ -48,22 +73,64 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to get monitors: {e}")
|
logger.error(f"Failed to get monitors: {e}")
|
||||||
|
|
||||||
# 1. Process Groups
|
# 0. Notification Providers
|
||||||
|
notification_map = {}
|
||||||
|
existing_notifications = {}
|
||||||
|
if api:
|
||||||
|
try:
|
||||||
|
for n in api.get_notifications():
|
||||||
|
existing_notifications[n['name']] = n
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to get notifications: {e}")
|
||||||
|
|
||||||
|
for notif_key, notif_cfg in config.get("notifications", {}).items():
|
||||||
|
webhook_env = notif_cfg.get("webhook_env")
|
||||||
|
webhook_url = os.getenv(webhook_env, "") if webhook_env else ""
|
||||||
|
notif_name = f"{project}-{notif_key}"
|
||||||
|
|
||||||
|
logger.info(f"Processing notification: {notif_name}")
|
||||||
|
if not dry_run:
|
||||||
|
if notif_name in existing_notifications:
|
||||||
|
notification_map[notif_key] = existing_notifications[notif_name]['id']
|
||||||
|
logger.info(f"Notification {notif_name} already exists (id={notification_map[notif_key]})")
|
||||||
|
elif webhook_url:
|
||||||
|
try:
|
||||||
|
res = api.add_notification(
|
||||||
|
type=NotificationType.SLACK,
|
||||||
|
name=notif_name,
|
||||||
|
isDefault=False,
|
||||||
|
slackwebhookURL=webhook_url,
|
||||||
|
applyExisting=False
|
||||||
|
)
|
||||||
|
notification_map[notif_key] = res.get('id')
|
||||||
|
logger.info(f"Created notification: {notif_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to create notification {notif_name}: {e}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Skipping {notif_name}: env var {webhook_env} is not set")
|
||||||
|
|
||||||
|
# 1. Groups
|
||||||
group_map = {}
|
group_map = {}
|
||||||
for g in config.get("groups", []):
|
for g in config.get("groups", []):
|
||||||
raw_name = g["name"]
|
raw_name = g["name"]
|
||||||
formatted_name = f"{project} [{env_name}] {raw_name}"
|
formatted_name = f"{project} [{env_name}] {raw_name}"
|
||||||
|
|
||||||
|
notif_ids = [notification_map[n] for n in g.get("notifications", []) if notification_map.get(n) is not None]
|
||||||
|
|
||||||
logger.info(f"Processing group: {formatted_name}")
|
logger.info(f"Processing group: {formatted_name}")
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
if formatted_name not in existing_monitors:
|
if formatted_name not in existing_monitors:
|
||||||
logger.info(f"Creating group monitor: {formatted_name}")
|
logger.info(f"Creating group monitor: {formatted_name}")
|
||||||
res = api.add_monitor(type=MonitorType.GROUP, name=formatted_name)
|
kwargs = {"type": MonitorType.GROUP, "name": formatted_name}
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
res = api.add_monitor(**kwargs)
|
||||||
group_map[raw_name] = res['monitorID']
|
group_map[raw_name] = res['monitorID']
|
||||||
else:
|
else:
|
||||||
group_map[raw_name] = existing_monitors[formatted_name]['id']
|
group_map[raw_name] = existing_monitors[formatted_name]['id']
|
||||||
|
|
||||||
tokens = {}
|
tokens = {}
|
||||||
|
new_monitor_ids = {} # m_name -> monitorID for monitors created in this run
|
||||||
|
|
||||||
# 2. Push Monitors
|
# 2. Push Monitors
|
||||||
for pm in config.get("push_monitors", []):
|
for pm in config.get("push_monitors", []):
|
||||||
@ -72,68 +139,321 @@ def setup_uptime_kuma(dry_run=False, only=None):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
m_interval = pm.get("interval", 60)
|
m_interval = pm.get("interval", 60)
|
||||||
|
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
||||||
parent_group_id = None
|
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
||||||
for g in config.get("groups", []):
|
|
||||||
if m_name in g.get("children", []):
|
|
||||||
parent_group_id = group_map.get(g["name"])
|
|
||||||
break
|
|
||||||
|
|
||||||
logger.info(f"Processing push monitor: {m_name}")
|
logger.info(f"Processing push monitor: {m_name}")
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
if m_name in existing_monitors:
|
if m_name in existing_monitors:
|
||||||
logger.info(f"Monitor {m_name} already exists.")
|
logger.info(f"Monitor {m_name} already exists. Updating...")
|
||||||
m_id = existing_monitors[m_name]['id']
|
m_id = existing_monitors[m_name]['id']
|
||||||
token = existing_monitors[m_name]['pushToken']
|
tokens[m_name] = existing_monitors[m_name]['pushToken']
|
||||||
tokens[m_name] = token
|
|
||||||
|
|
||||||
if parent_group_id and existing_monitors[m_name].get('parent') != parent_group_id:
|
kwargs = {
|
||||||
api.edit_monitor(m_id, parent=parent_group_id)
|
"interval": m_interval
|
||||||
|
}
|
||||||
|
if parent_group_id:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
|
||||||
|
try:
|
||||||
|
api.edit_monitor(m_id, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to edit push monitor {m_name}: {e}")
|
||||||
else:
|
else:
|
||||||
logger.info(f"Creating push monitor: {m_name}")
|
logger.info(f"Creating push monitor: {m_name}")
|
||||||
result = api.add_monitor(
|
kwargs = {
|
||||||
type=MonitorType.PUSH,
|
"type": MonitorType.PUSH,
|
||||||
name=m_name,
|
"name": m_name,
|
||||||
interval=m_interval,
|
"interval": m_interval,
|
||||||
parent=parent_group_id
|
"parent": parent_group_id
|
||||||
)
|
}
|
||||||
m_id = result['monitorID']
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
# Fetch again to get pushToken
|
result = api.add_monitor(**kwargs)
|
||||||
for m in api.get_monitors():
|
new_monitor_ids[m_name] = result['monitorID']
|
||||||
if m['id'] == m_id:
|
|
||||||
tokens[m_name] = m['pushToken']
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
tokens[m_name] = "dummy_token_dry_run"
|
tokens[m_name] = "dummy_token_dry_run"
|
||||||
|
|
||||||
# 3. Process Status Pages
|
# Fetch push tokens for newly created monitors in one batch call.
|
||||||
|
# Calling api.get_monitors() per-monitor races with WebSocket event delivery;
|
||||||
|
# a single call after all creates allows the server state to settle.
|
||||||
|
if new_monitor_ids and api:
|
||||||
|
id_to_name = {v: k for k, v in new_monitor_ids.items()}
|
||||||
|
for m in api.get_monitors():
|
||||||
|
if m['id'] in id_to_name:
|
||||||
|
m_name = id_to_name[m['id']]
|
||||||
|
tokens[m_name] = m.get('pushToken', '')
|
||||||
|
logger.info(f"Captured push token for {m_name}")
|
||||||
|
missing = [n for n in new_monitor_ids if n not in tokens]
|
||||||
|
if missing:
|
||||||
|
logger.warning(f"Could not capture push token for: {missing}")
|
||||||
|
|
||||||
|
# 3. HTTP Monitors
|
||||||
|
for hm in config.get("http_monitors", []):
|
||||||
|
m_name = hm["name"]
|
||||||
|
if only and m_name != only:
|
||||||
|
continue
|
||||||
|
url = resolve_template(hm["url"], suffix, domain)
|
||||||
|
interval = hm.get("interval", 60)
|
||||||
|
accepted_statuscodes = hm.get("accepted_statuscodes", ["200"])
|
||||||
|
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
||||||
|
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
||||||
|
|
||||||
|
logger.info(f"Processing HTTP monitor: {m_name} -> {url}")
|
||||||
|
if not dry_run:
|
||||||
|
if m_name in existing_monitors:
|
||||||
|
logger.info(f"Monitor {m_name} already exists. Updating...")
|
||||||
|
m_id = existing_monitors[m_name]['id']
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.HTTP,
|
||||||
|
"name": m_name,
|
||||||
|
"url": url,
|
||||||
|
"interval": interval,
|
||||||
|
"accepted_statuscodes": accepted_statuscodes,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
try:
|
||||||
|
api.edit_monitor(m_id, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to edit HTTP monitor {m_name}: {e}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.HTTP,
|
||||||
|
"name": m_name,
|
||||||
|
"url": url,
|
||||||
|
"interval": interval,
|
||||||
|
"accepted_statuscodes": accepted_statuscodes,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
api.add_monitor(**kwargs)
|
||||||
|
logger.info(f"Created HTTP monitor: {m_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to create HTTP monitor {m_name}: {e}")
|
||||||
|
|
||||||
|
# 4. DNS Monitors
|
||||||
|
for dm in config.get("dns_monitors", []):
|
||||||
|
m_name = dm["name"]
|
||||||
|
if only and m_name != only:
|
||||||
|
continue
|
||||||
|
hostname = resolve_template(dm["hostname"], suffix, domain)
|
||||||
|
dns_resolve_type = dm.get("dns_resolve_type", "A")
|
||||||
|
interval = dm.get("interval", 60)
|
||||||
|
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
||||||
|
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
||||||
|
|
||||||
|
logger.info(f"Processing DNS monitor: {m_name} -> {hostname}")
|
||||||
|
if not dry_run:
|
||||||
|
if m_name in existing_monitors:
|
||||||
|
logger.info(f"Monitor {m_name} already exists. Updating...")
|
||||||
|
m_id = existing_monitors[m_name]['id']
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.DNS,
|
||||||
|
"name": m_name,
|
||||||
|
"hostname": hostname,
|
||||||
|
"port": 53,
|
||||||
|
"accepted_statuscodes": ["200-299"],
|
||||||
|
"dns_resolve_type": dns_resolve_type,
|
||||||
|
"interval": interval,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
try:
|
||||||
|
api.edit_monitor(m_id, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to edit DNS monitor {m_name}: {e}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.DNS,
|
||||||
|
"name": m_name,
|
||||||
|
"hostname": hostname,
|
||||||
|
"port": 53,
|
||||||
|
"accepted_statuscodes": ["200-299"],
|
||||||
|
"dns_resolve_type": dns_resolve_type,
|
||||||
|
"interval": interval,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
api.add_monitor(**kwargs)
|
||||||
|
logger.info(f"Created DNS monitor: {m_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to create DNS monitor {m_name}: {e}")
|
||||||
|
|
||||||
|
# 5. Ping Monitors (generated from nodes config)
|
||||||
|
ping_cfg = config.get("ping_monitors", {})
|
||||||
|
ping_interval = ping_cfg.get("interval", 60)
|
||||||
|
ping_retries = ping_cfg.get("max_retries", 1)
|
||||||
|
env_nodes = config.get("nodes", {}).get(env_name, {})
|
||||||
|
|
||||||
|
for i, node in enumerate(env_nodes.get("service", []), 1):
|
||||||
|
m_name = f"Ext Ping App{i:02d}"
|
||||||
|
if only and m_name != only:
|
||||||
|
continue
|
||||||
|
ip = node["ip"]
|
||||||
|
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
||||||
|
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
||||||
|
|
||||||
|
logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
|
||||||
|
if not dry_run:
|
||||||
|
if m_name in existing_monitors:
|
||||||
|
logger.info(f"Monitor {m_name} already exists. Updating...")
|
||||||
|
m_id = existing_monitors[m_name]['id']
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.PING,
|
||||||
|
"name": m_name,
|
||||||
|
"hostname": ip,
|
||||||
|
"interval": ping_interval,
|
||||||
|
"maxretries": ping_retries,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
try:
|
||||||
|
api.edit_monitor(m_id, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to edit Ping monitor {m_name}: {e}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.PING,
|
||||||
|
"name": m_name,
|
||||||
|
"hostname": ip,
|
||||||
|
"interval": ping_interval,
|
||||||
|
"maxretries": ping_retries,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
api.add_monitor(**kwargs)
|
||||||
|
logger.info(f"Created Ping monitor: {m_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
|
||||||
|
|
||||||
|
for i, node in enumerate(env_nodes.get("db", []), 1):
|
||||||
|
m_name = f"Ext Ping Db{i:02d}"
|
||||||
|
if only and m_name != only:
|
||||||
|
continue
|
||||||
|
ip = node["ip"]
|
||||||
|
parent_group_id = find_parent_group(m_name, config.get("groups", []), group_map)
|
||||||
|
notif_ids = find_group_notifications(m_name, config.get("groups", []), notification_map)
|
||||||
|
|
||||||
|
logger.info(f"Processing Ping monitor: {m_name} -> {ip}")
|
||||||
|
if not dry_run:
|
||||||
|
if m_name in existing_monitors:
|
||||||
|
logger.info(f"Monitor {m_name} already exists. Updating...")
|
||||||
|
m_id = existing_monitors[m_name]['id']
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.PING,
|
||||||
|
"name": m_name,
|
||||||
|
"hostname": ip,
|
||||||
|
"interval": ping_interval,
|
||||||
|
"maxretries": ping_retries,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
try:
|
||||||
|
api.edit_monitor(m_id, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to edit Ping monitor {m_name}: {e}")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
kwargs = {
|
||||||
|
"type": MonitorType.PING,
|
||||||
|
"name": m_name,
|
||||||
|
"hostname": ip,
|
||||||
|
"interval": ping_interval,
|
||||||
|
"maxretries": ping_retries,
|
||||||
|
}
|
||||||
|
if parent_group_id is not None:
|
||||||
|
kwargs["parent"] = parent_group_id
|
||||||
|
if notif_ids:
|
||||||
|
kwargs["notificationIDList"] = notif_ids
|
||||||
|
api.add_monitor(**kwargs)
|
||||||
|
logger.info(f"Created Ping monitor: {m_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to create Ping monitor {m_name}: {e}")
|
||||||
|
|
||||||
|
# 6. Status Pages
|
||||||
|
if api:
|
||||||
|
existing_pages = {}
|
||||||
|
try:
|
||||||
|
for p in api.get_status_pages():
|
||||||
|
existing_pages[p['slug']] = p
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to get status pages: {e}")
|
||||||
|
|
||||||
for sp in config.get("status_pages", []):
|
for sp in config.get("status_pages", []):
|
||||||
slug = format_str(sp["slug"], env_name, project)
|
slug = format_str(sp["slug"], env_name, project)
|
||||||
title = format_str(sp["title"], env_name, project)
|
title = format_str(sp["title"], env_name, project)
|
||||||
|
is_public = sp.get("public", False)
|
||||||
|
sp_groups = sp.get("groups", [])
|
||||||
|
|
||||||
logger.info(f"Processing status page: {title} (slug: {slug})")
|
logger.info(f"Processing status page: {title} (slug: {slug})")
|
||||||
if not dry_run:
|
|
||||||
try:
|
try:
|
||||||
pages = api.get_status_pages()
|
if slug not in existing_pages:
|
||||||
exists = any(p['slug'] == slug for p in pages)
|
|
||||||
if not exists:
|
|
||||||
logger.info(f"Creating status page: {slug}")
|
logger.info(f"Creating status page: {slug}")
|
||||||
api.add_status_page(slug, title)
|
api.add_status_page(slug, title)
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Status page ops failed: {e}")
|
|
||||||
|
|
||||||
# 4. Write tokens to uk_tokens.yml
|
# Each monitors.yml group becomes one display section on the status page.
|
||||||
|
# Use group_map (populated during Section 1) to avoid re-fetching monitors;
|
||||||
|
# a fresh get_monitors() call after add_monitor() races with WebSocket delivery.
|
||||||
|
public_group_list = []
|
||||||
|
for group_raw_name in sp_groups:
|
||||||
|
group_id = group_map.get(group_raw_name)
|
||||||
|
if not group_id:
|
||||||
|
logger.warning(f"Group '{group_raw_name}' not in group_map, skipping in status page")
|
||||||
|
continue
|
||||||
|
public_group_list.append({
|
||||||
|
"name": group_raw_name,
|
||||||
|
"weight": len(public_group_list) + 1,
|
||||||
|
"monitorList": [{"id": group_id}]
|
||||||
|
})
|
||||||
|
|
||||||
|
if public_group_list:
|
||||||
|
api.save_status_page(
|
||||||
|
slug=slug,
|
||||||
|
title=title,
|
||||||
|
publicGroupList=public_group_list,
|
||||||
|
published=is_public
|
||||||
|
)
|
||||||
|
logger.info(f"Saved status page '{slug}' with {len(public_group_list)} group(s)")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Status page ops failed for {slug}: {e}")
|
||||||
|
|
||||||
|
# 7. Write push tokens to uk_tokens.yml
|
||||||
token_file = os.path.join(os.path.dirname(__file__), "..", "config", "generated", "uk_tokens.yml")
|
token_file = os.path.join(os.path.dirname(__file__), "..", "config", "generated", "uk_tokens.yml")
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
|
if not tokens:
|
||||||
|
logger.warning("No push tokens captured; skipping uk_tokens.yml write so setup reruns next time")
|
||||||
|
else:
|
||||||
|
os.makedirs(os.path.dirname(token_file), exist_ok=True)
|
||||||
with open(token_file, "w") as f:
|
with open(token_file, "w") as f:
|
||||||
yaml.dump(tokens, f)
|
yaml.dump(tokens, f)
|
||||||
logger.info(f"Saved push tokens to {token_file}")
|
logger.info(f"Saved {len(tokens)} push tokens to {token_file}")
|
||||||
else:
|
else:
|
||||||
logger.info(f"[DRY-RUN] Would save {len(tokens)} tokens to {token_file}")
|
logger.info(f"[DRY-RUN] Would save {len(tokens)} tokens to {token_file}")
|
||||||
|
|
||||||
if api:
|
if api:
|
||||||
api.disconnect()
|
api.disconnect()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="Setup Uptime Kuma monitors")
|
parser = argparse.ArgumentParser(description="Setup Uptime Kuma monitors")
|
||||||
parser.add_argument("--dry-run", action="store_true", help="Print actions without making changes")
|
parser.add_argument("--dry-run", action="store_true", help="Print actions without making changes")
|
||||||
|
|||||||
@ -10,7 +10,9 @@ def check_storagebox_mount():
|
|||||||
|
|
||||||
storagebox_path = os.getenv("STORAGEBOX_PATH", "/mnt/storagebox")
|
storagebox_path = os.getenv("STORAGEBOX_PATH", "/mnt/storagebox")
|
||||||
expected_files = [
|
expected_files = [
|
||||||
"patroni/patroni.yml",
|
"db/postgresql-01/config/patroni.yml",
|
||||||
|
"db/postgresql-02/config/patroni.yml",
|
||||||
|
"db/postgresql-03/config/patroni.yml",
|
||||||
"ssl/STAR.iklim.co.full.crt"
|
"ssl/STAR.iklim.co.full.crt"
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -18,7 +20,7 @@ def check_storagebox_mount():
|
|||||||
|
|
||||||
if not os.path.exists(storagebox_path):
|
if not os.path.exists(storagebox_path):
|
||||||
ping_ms = int((time.time() - start_t) * 1000)
|
ping_ms = int((time.time() - start_t) * 1000)
|
||||||
push("STORAGEBOX-MOUNT", "down", f"{storagebox_path} not found", ping_ms)
|
push("Storagebox Mount", "down", f"{storagebox_path} not found", ping_ms)
|
||||||
return
|
return
|
||||||
|
|
||||||
for rel_path in expected_files:
|
for rel_path in expected_files:
|
||||||
@ -30,7 +32,7 @@ def check_storagebox_mount():
|
|||||||
|
|
||||||
if missing_files:
|
if missing_files:
|
||||||
msg = f"mount exists but missing: {', '.join(missing_files)}"
|
msg = f"mount exists but missing: {', '.join(missing_files)}"
|
||||||
push("STORAGEBOX-MOUNT", "down", msg, ping_ms)
|
push("Storagebox Mount", "down", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
msg = f"{storagebox_path} OK | all critical files present"
|
msg = f"{storagebox_path} OK | all critical files present"
|
||||||
push("STORAGEBOX-MOUNT", "up", msg, ping_ms)
|
push("Storagebox Mount", "up", msg, ping_ms)
|
||||||
|
|||||||
@ -54,7 +54,7 @@ def check_patroni_cluster():
|
|||||||
ping_ms = int((time.time() - start_t) * 1000)
|
ping_ms = int((time.time() - start_t) * 1000)
|
||||||
|
|
||||||
if not cluster_data:
|
if not cluster_data:
|
||||||
push("PATRONI-CLUSTER", "down", error_msg, ping_ms)
|
push("Patroni Cluster", "down", error_msg, ping_ms)
|
||||||
return
|
return
|
||||||
|
|
||||||
members = cluster_data.get("members", [])
|
members = cluster_data.get("members", [])
|
||||||
@ -73,7 +73,7 @@ def check_patroni_cluster():
|
|||||||
if not leader:
|
if not leader:
|
||||||
down_nodes = [f"{r[0]} state: {r[2]}" for r in replicas if r[2] not in ("running", "streaming")]
|
down_nodes = [f"{r[0]} state: {r[2]}" for r in replicas if r[2] not in ("running", "streaming")]
|
||||||
msg = f"no leader detected | " + " ".join(down_nodes)
|
msg = f"no leader detected | " + " ".join(down_nodes)
|
||||||
push("PATRONI-CLUSTER", "down", msg, ping_ms)
|
push("Patroni Cluster", "down", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
lag_strs = []
|
lag_strs = []
|
||||||
for name, lag, state in replicas:
|
for name, lag, state in replicas:
|
||||||
@ -81,7 +81,7 @@ def check_patroni_cluster():
|
|||||||
lag_strs.append(f"{name} (lag:{lag_mb:.0f}MB)")
|
lag_strs.append(f"{name} (lag:{lag_mb:.0f}MB)")
|
||||||
|
|
||||||
msg = f"leader: {leader} | replicas: " + " ".join(lag_strs)
|
msg = f"leader: {leader} | replicas: " + " ".join(lag_strs)
|
||||||
push("PATRONI-CLUSTER", "up", msg, ping_ms)
|
push("Patroni Cluster", "up", msg, ping_ms)
|
||||||
|
|
||||||
def check_rabbitmq_cluster():
|
def check_rabbitmq_cluster():
|
||||||
url = "http://rabbitmq:15672/api/healthchecks/node"
|
url = "http://rabbitmq:15672/api/healthchecks/node"
|
||||||
@ -104,14 +104,14 @@ def check_rabbitmq_cluster():
|
|||||||
alarms = [n.get("name") for n in data if n.get("mem_alarm") or n.get("disk_free_alarm")]
|
alarms = [n.get("name") for n in data if n.get("mem_alarm") or n.get("disk_free_alarm")]
|
||||||
if alarms:
|
if alarms:
|
||||||
msg = f"disk/mem alarm active on {','.join(alarms)}"
|
msg = f"disk/mem alarm active on {','.join(alarms)}"
|
||||||
push("RABBITMQ-CLUSTER", "down", msg, ping_ms)
|
push("Rabbitmq Cluster", "down", msg, ping_ms)
|
||||||
return
|
return
|
||||||
|
|
||||||
msg = f"{nodes_running}/{total_nodes} nodes running"
|
msg = f"{nodes_running}/{total_nodes} nodes running"
|
||||||
push("RABBITMQ-CLUSTER", "up", msg, ping_ms)
|
push("Rabbitmq Cluster", "up", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
msg = err or f"HTTP {resp.status_code if resp else 'Unknown'}"
|
msg = err or f"HTTP {resp.status_code if resp else 'Unknown'}"
|
||||||
push("RABBITMQ-CLUSTER", "down", msg, ping_ms)
|
push("Rabbitmq Cluster", "down", msg, ping_ms)
|
||||||
|
|
||||||
def check_apisix():
|
def check_apisix():
|
||||||
url = "http://apisix:9180/apisix/admin/routes"
|
url = "http://apisix:9180/apisix/admin/routes"
|
||||||
@ -120,9 +120,9 @@ def check_apisix():
|
|||||||
ok, resp, ping_ms, err = http_check(url, headers=headers)
|
ok, resp, ping_ms, err = http_check(url, headers=headers)
|
||||||
|
|
||||||
if ok:
|
if ok:
|
||||||
push("APISIX-GATEWAY", "up", "admin API reachable", ping_ms)
|
push("Apisix Gateway", "up", "admin API reachable", ping_ms)
|
||||||
else:
|
else:
|
||||||
push("APISIX-GATEWAY", "down", f"admin API unreachable: {err or resp.status_code}", ping_ms)
|
push("Apisix Gateway", "down", f"admin API unreachable: {err or resp.status_code}", ping_ms)
|
||||||
|
|
||||||
def check_vault():
|
def check_vault():
|
||||||
hosts_env = os.getenv("VAULT_HOSTS", "vault")
|
hosts_env = os.getenv("VAULT_HOSTS", "vault")
|
||||||
@ -139,31 +139,31 @@ def check_vault():
|
|||||||
ok, resp, ms, err = http_check(url, expected_status=[200, 429, 473])
|
ok, resp, ms, err = http_check(url, expected_status=[200, 429, 473])
|
||||||
max_ping = max(max_ping, ms)
|
max_ping = max(max_ping, ms)
|
||||||
|
|
||||||
if resp:
|
if resp is not None:
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
if not data.get("sealed"):
|
if not data.get("sealed"):
|
||||||
unsealed_count += 1
|
unsealed_count += 1
|
||||||
else:
|
else:
|
||||||
errors.append(f"{node} SEALED")
|
errors.append(f"{node} SEALED")
|
||||||
else:
|
else:
|
||||||
errors.append(f"{node} unreachable")
|
errors.append(f"{node} unreachable: {err}")
|
||||||
|
|
||||||
ping_ms = int((time.time() - start_t) * 1000)
|
ping_ms = int((time.time() - start_t) * 1000)
|
||||||
|
|
||||||
if unsealed_count == total:
|
if unsealed_count == total:
|
||||||
msg = f"{unsealed_count}/{total} unsealed"
|
msg = f"{unsealed_count}/{total} unsealed"
|
||||||
push("VAULT-CLUSTER", "up", msg, ping_ms)
|
push("Vault Cluster", "up", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
msg = " | ".join(errors) if errors else "Vault checks failed"
|
msg = " | ".join(errors) if errors else "Vault checks failed"
|
||||||
push("VAULT-CLUSTER", "down", msg, ping_ms)
|
push("Vault Cluster", "down", msg, ping_ms)
|
||||||
|
|
||||||
def check_prometheus():
|
def check_prometheus():
|
||||||
url = "http://prometheus:9090/-/healthy"
|
url = "http://prometheus:9090/-/healthy"
|
||||||
ok, resp, ping_ms, err = http_check(url)
|
ok, resp, ping_ms, err = http_check(url)
|
||||||
if ok:
|
if ok:
|
||||||
push("PROMETHEUS", "up", "healthy", ping_ms)
|
push("Prometheus", "up", "healthy", ping_ms)
|
||||||
else:
|
else:
|
||||||
push("PROMETHEUS", "down", f"prometheus unreachable: {err}", ping_ms)
|
push("Prometheus", "down", f"prometheus unreachable: {err}", ping_ms)
|
||||||
|
|
||||||
def check_grafana():
|
def check_grafana():
|
||||||
url = "http://grafana:3000/api/health"
|
url = "http://grafana:3000/api/health"
|
||||||
@ -172,27 +172,27 @@ def check_grafana():
|
|||||||
data = resp.json()
|
data = resp.json()
|
||||||
db_status = data.get("database", "unknown")
|
db_status = data.get("database", "unknown")
|
||||||
if db_status == "ok":
|
if db_status == "ok":
|
||||||
push("GRAFANA", "up", f"ok | db: {db_status}", ping_ms)
|
push("Grafana", "up", f"ok | db: {db_status}", ping_ms)
|
||||||
else:
|
else:
|
||||||
push("GRAFANA", "down", f"db not ok: {db_status}", ping_ms)
|
push("Grafana", "down", f"db not ok: {db_status}", ping_ms)
|
||||||
else:
|
else:
|
||||||
push("GRAFANA", "down", f"grafana unreachable: {err}", ping_ms)
|
push("Grafana", "down", f"grafana unreachable: {err}", ping_ms)
|
||||||
|
|
||||||
def check_portainer():
|
def check_portainer():
|
||||||
url = "http://portainer:9000/api/system/status"
|
url = "http://portainer:9000/api/system/status"
|
||||||
ok, resp, ping_ms, err = http_check(url)
|
ok, resp, ping_ms, err = http_check(url)
|
||||||
if ok:
|
if ok:
|
||||||
push("PORTAINER", "up", "running", ping_ms)
|
push("Portainer", "up", "running", ping_ms)
|
||||||
else:
|
else:
|
||||||
push("PORTAINER", "down", f"portainer unreachable: {err}", ping_ms)
|
push("Portainer", "down", f"portainer unreachable: {err}", ping_ms)
|
||||||
|
|
||||||
def check_loki():
|
def check_loki():
|
||||||
url = "http://loki:3100/ready"
|
url = "http://loki:3100/ready"
|
||||||
ok, resp, ping_ms, err = http_check(url)
|
ok, resp, ping_ms, err = http_check(url)
|
||||||
if ok:
|
if ok:
|
||||||
push("LOKI", "up", "ready", ping_ms)
|
push("Loki", "up", "ready", ping_ms)
|
||||||
else:
|
else:
|
||||||
push("LOKI", "down", f"loki unreachable: {err}", ping_ms)
|
push("Loki", "down", f"loki unreachable: {err}", ping_ms)
|
||||||
|
|
||||||
def run_all_http_checks():
|
def run_all_http_checks():
|
||||||
check_patroni_cluster()
|
check_patroni_cluster()
|
||||||
|
|||||||
@ -35,7 +35,7 @@ def check_mongodb():
|
|||||||
ping_ms = int((time.time() - start_t) * 1000)
|
ping_ms = int((time.time() - start_t) * 1000)
|
||||||
|
|
||||||
if cluster_size == 1:
|
if cluster_size == 1:
|
||||||
push("MONGODB-REPLICASET", "up", "standalone mode OK", ping_ms)
|
push("Mongodb Replicaset", "up", "standalone mode OK", ping_ms)
|
||||||
return
|
return
|
||||||
|
|
||||||
if primary:
|
if primary:
|
||||||
@ -45,13 +45,13 @@ def check_mongodb():
|
|||||||
unhealthy_secs = [s for s in secondaries if s[1] not in ('SECONDARY', 'ARBITER')]
|
unhealthy_secs = [s for s in secondaries if s[1] not in ('SECONDARY', 'ARBITER')]
|
||||||
if unhealthy_secs:
|
if unhealthy_secs:
|
||||||
msg = f"PRIMARY: {primary} | unhealthy: {','.join([s[0] + ':' + s[1] for s in unhealthy_secs])}"
|
msg = f"PRIMARY: {primary} | unhealthy: {','.join([s[0] + ':' + s[1] for s in unhealthy_secs])}"
|
||||||
push("MONGODB-REPLICASET", "down", msg, ping_ms)
|
push("Mongodb Replicaset", "down", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
push("MONGODB-REPLICASET", "up", msg, ping_ms)
|
push("Mongodb Replicaset", "up", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
msg = "no PRIMARY | quorum lost"
|
msg = "no PRIMARY | quorum lost"
|
||||||
push("MONGODB-REPLICASET", "down", msg, ping_ms)
|
push("Mongodb Replicaset", "down", msg, ping_ms)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ping_ms = int((time.time() - start_t) * 1000)
|
ping_ms = int((time.time() - start_t) * 1000)
|
||||||
push("MONGODB-REPLICASET", "down", f"connection failed: {e}", ping_ms)
|
push("Mongodb Replicaset", "down", f"connection failed: {e}", ping_ms)
|
||||||
|
|||||||
@ -24,11 +24,15 @@ def check_redis_sentinel():
|
|||||||
redis_mode = os.getenv("REDIS_MODE", "sentinel")
|
redis_mode = os.getenv("REDIS_MODE", "sentinel")
|
||||||
|
|
||||||
if redis_mode != "sentinel":
|
if redis_mode != "sentinel":
|
||||||
push("REDIS-SENTINEL", "up", "standalone mode (skipped)", int((time.time() - start_t) * 1000))
|
push("Redis Sentinel", "up", "standalone mode (skipped)", int((time.time() - start_t) * 1000))
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sentinel = Sentinel(sentinel_nodes, socket_timeout=3, password=password)
|
sentinel_kwargs = {"socket_timeout": 3}
|
||||||
|
if password:
|
||||||
|
sentinel_kwargs["password"] = password
|
||||||
|
|
||||||
|
sentinel = Sentinel(sentinel_nodes, sentinel_kwargs=sentinel_kwargs, socket_timeout=3, password=password)
|
||||||
|
|
||||||
# Master ping
|
# Master ping
|
||||||
master = sentinel.master_for(master_name, socket_timeout=3, password=password)
|
master = sentinel.master_for(master_name, socket_timeout=3, password=password)
|
||||||
@ -43,8 +47,8 @@ def check_redis_sentinel():
|
|||||||
ping_ms = int((time.time() - start_t) * 1000)
|
ping_ms = int((time.time() - start_t) * 1000)
|
||||||
|
|
||||||
msg = f"master: {master_ip}:{master_port} | replicas: {replicas_count} | sentinels quorum OK"
|
msg = f"master: {master_ip}:{master_port} | replicas: {replicas_count} | sentinels quorum OK"
|
||||||
push("REDIS-SENTINEL", "up", msg, ping_ms)
|
push("Redis Sentinel", "up", msg, ping_ms)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ping_ms = int((time.time() - start_t) * 1000)
|
ping_ms = int((time.time() - start_t) * 1000)
|
||||||
push("REDIS-SENTINEL", "down", f"quorum FAIL or master unreachable: {e}", ping_ms)
|
push("Redis Sentinel", "down", f"quorum FAIL or master unreachable: {e}", ping_ms)
|
||||||
|
|||||||
@ -38,12 +38,12 @@ def check_swarm_cluster():
|
|||||||
|
|
||||||
if ready_count == total_nodes:
|
if ready_count == total_nodes:
|
||||||
msg = f"{ready_count}/{total_nodes} nodes Ready (managers: {', '.join(managers)})"
|
msg = f"{ready_count}/{total_nodes} nodes Ready (managers: {', '.join(managers)})"
|
||||||
push("SWARM-CLUSTER", "up", msg, ping_ms)
|
push("Swarm Cluster", "up", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
msg = f"{ready_count}/{total_nodes} nodes Ready | Managers reachable: {len(managers)}"
|
msg = f"{ready_count}/{total_nodes} nodes Ready | Managers reachable: {len(managers)}"
|
||||||
push("SWARM-CLUSTER", "down", msg, ping_ms)
|
push("Swarm Cluster", "down", msg, ping_ms)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
ping_ms = int((time.time() - start_time) * 1000)
|
ping_ms = int((time.time() - start_time) * 1000)
|
||||||
logger.error(f"Swarm check failed: {e}")
|
logger.error(f"Swarm check failed: {e}")
|
||||||
push("SWARM-CLUSTER", "down", str(e), ping_ms)
|
push("Swarm Cluster", "down", str(e), ping_ms)
|
||||||
|
|||||||
@ -70,8 +70,8 @@ def check_etcd_cluster():
|
|||||||
if healthy_count == len(nodes):
|
if healthy_count == len(nodes):
|
||||||
leader_info = f" | leader: {leader}" if leader else ""
|
leader_info = f" | leader: {leader}" if leader else ""
|
||||||
msg = f"{healthy_count}/{len(nodes)} healthy{leader_info}"
|
msg = f"{healthy_count}/{len(nodes)} healthy{leader_info}"
|
||||||
push("ETCD-CLUSTER", "up", msg, ping_ms)
|
push("Etcd Cluster", "up", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
quorum_msg = f" | quorum at risk ({healthy_count}/{len(nodes)})" if healthy_count < 3 else ""
|
quorum_msg = f" | quorum at risk ({healthy_count}/{len(nodes)})" if healthy_count < 3 else ""
|
||||||
msg = " | ".join(errors) + quorum_msg
|
msg = " | ".join(errors) + quorum_msg
|
||||||
push("ETCD-CLUSTER", "down", msg, ping_ms)
|
push("Etcd Cluster", "down", msg, ping_ms)
|
||||||
|
|||||||
@ -57,6 +57,6 @@ def check_swag_tls():
|
|||||||
msg = " | ".join(msg_parts)
|
msg = " | ".join(msg_parts)
|
||||||
|
|
||||||
if is_down:
|
if is_down:
|
||||||
push("SWAG-TLS", "down", msg, ping_ms)
|
push("Swag Tls", "down", msg, ping_ms)
|
||||||
else:
|
else:
|
||||||
push("SWAG-TLS", "up", msg, ping_ms)
|
push("Swag Tls", "up", msg, ping_ms)
|
||||||
|
|||||||
@ -16,10 +16,10 @@ EXTERNAL_DOMAIN = os.getenv("EXTERNAL_DOMAIN", "iklim.co")
|
|||||||
EXTERNAL_SUBDOMAIN_SUFFIX = os.getenv("EXTERNAL_SUBDOMAIN_SUFFIX", "")
|
EXTERNAL_SUBDOMAIN_SUFFIX = os.getenv("EXTERNAL_SUBDOMAIN_SUFFIX", "")
|
||||||
|
|
||||||
def load_uk_tokens():
|
def load_uk_tokens():
|
||||||
token_file = Path("config/generated/uk_tokens.yml")
|
try:
|
||||||
if not token_file.exists():
|
with open("config/generated/uk_tokens.yml", "r") as f:
|
||||||
return {}
|
|
||||||
with open(token_file, "r") as f:
|
|
||||||
return yaml.safe_load(f) or {}
|
return yaml.safe_load(f) or {}
|
||||||
|
except (FileNotFoundError, OSError):
|
||||||
|
return {}
|
||||||
|
|
||||||
UK_TOKENS = load_uk_tokens()
|
UK_TOKENS = load_uk_tokens()
|
||||||
|
|||||||
@ -2,6 +2,7 @@ import argparse
|
|||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
|
import threading
|
||||||
from health_agent.checks import swarm
|
from health_agent.checks import swarm
|
||||||
from health_agent.checks.http import run_all_http_checks
|
from health_agent.checks.http import run_all_http_checks
|
||||||
from health_agent.checks.tcp import check_etcd_cluster
|
from health_agent.checks.tcp import check_etcd_cluster
|
||||||
@ -65,9 +66,9 @@ def run_checks():
|
|||||||
logger.error(f"Error running MongoDB checks: {e}")
|
logger.error(f"Error running MongoDB checks: {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
check_storagebox_mount()
|
threading.Thread(target=check_storagebox_mount, daemon=True).start()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error running filesystem checks: {e}")
|
logger.error(f"Error starting filesystem check thread: {e}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description="iklim.co Health Agent")
|
parser = argparse.ArgumentParser(description="iklim.co Health Agent")
|
||||||
@ -88,5 +89,7 @@ if __name__ == "__main__":
|
|||||||
run_checks()
|
run_checks()
|
||||||
else:
|
else:
|
||||||
while True:
|
while True:
|
||||||
|
t_start = time.time()
|
||||||
run_checks()
|
run_checks()
|
||||||
time.sleep(60)
|
elapsed = time.time() - t_start
|
||||||
|
time.sleep(max(0, 60 - elapsed))
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import logging
|
import logging
|
||||||
from health_agent.config import UK_TOKENS
|
from health_agent.config import load_uk_tokens
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
UK_PUSH_URL_BASE = os.getenv("UK_PUSH_URL_BASE", "https://uptime.tarla.io/api/push")
|
UK_PUSH_URL_BASE = os.getenv("UK_PUSH_URL_BASE", "https://uptime.tarla.io/api/push")
|
||||||
@ -9,13 +9,13 @@ UK_PUSH_URL_BASE = os.getenv("UK_PUSH_URL_BASE", "https://uptime.tarla.io/api/pu
|
|||||||
DRY_RUN = False
|
DRY_RUN = False
|
||||||
|
|
||||||
def push(monitor_name: str, status: str, msg: str, ping_ms: int):
|
def push(monitor_name: str, status: str, msg: str, ping_ms: int):
|
||||||
token = UK_TOKENS.get(monitor_name)
|
token = load_uk_tokens().get(monitor_name)
|
||||||
if not token:
|
if not token:
|
||||||
logger.warning(f"No token found for monitor {monitor_name}")
|
logger.warning(f"No token found for monitor {monitor_name}")
|
||||||
return
|
return
|
||||||
|
|
||||||
if DRY_RUN:
|
if DRY_RUN:
|
||||||
logger.info(f"[DRY-RUN] Would push {monitor_name} status={status} msg={msg} ping={ping_ms}ms", extra={"check": monitor_name, "status": status, "msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
|
logger.info(f"[DRY-RUN] Would push {monitor_name} status={status} msg={msg} ping={ping_ms}ms", extra={"check": monitor_name, "status": status, "push_msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
|
||||||
return
|
return
|
||||||
|
|
||||||
url = f"{UK_PUSH_URL_BASE}/{token}"
|
url = f"{UK_PUSH_URL_BASE}/{token}"
|
||||||
@ -28,6 +28,6 @@ def push(monitor_name: str, status: str, msg: str, ping_ms: int):
|
|||||||
try:
|
try:
|
||||||
response = requests.get(url, params=params, timeout=10)
|
response = requests.get(url, params=params, timeout=10)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
logger.info(f"Pushed {monitor_name} status={status}", extra={"check": monitor_name, "status": status, "msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
|
logger.info(f"Pushed {monitor_name} status={status}", extra={"check": monitor_name, "status": status, "push_msg": msg, "ping_ms": ping_ms, "source": "uptime_kuma"})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to push {monitor_name}: {e}", extra={"check": monitor_name, "status": "push_failed", "error": str(e), "source": "uptime_kuma"})
|
logger.error(f"Failed to push {monitor_name}: {e}", extra={"check": monitor_name, "status": "push_failed", "error": str(e), "source": "uptime_kuma"})
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user