93 lines
2.9 KiB
Python

import argparse
import time
import logging
import json
from health_agent.checks import swarm
from health_agent.checks.http import run_all_http_checks
from health_agent.checks.tcp import check_etcd_cluster
from health_agent.checks.tls import check_swag_tls
from health_agent.checks.redis_sentinel import check_redis_sentinel
from health_agent.checks.mongodb import check_mongodb
from health_agent.checks.filesystem import check_storagebox_mount
from health_agent.events.docker_events import start_docker_event_listener
import health_agent.uptime_kuma as uk
class JSONFormatter(logging.Formatter):
def format(self, record):
log_obj = {
"time": self.formatTime(record, self.datefmt),
"level": record.levelname,
"logger": record.name,
"msg": record.getMessage()
}
for attr in ['check', 'status', 'ping_ms', 'source', 'error']:
if hasattr(record, attr):
log_obj[attr] = getattr(record, attr)
if record.exc_info:
log_obj['exc_info'] = self.formatException(record.exc_info)
return json.dumps(log_obj)
handler = logging.StreamHandler()
handler.setFormatter(JSONFormatter())
logging.basicConfig(level=logging.INFO, handlers=[handler])
logger = logging.getLogger("main")
def run_checks():
logger.info("Running health checks...")
try:
swarm.check_swarm_cluster()
except Exception as e:
logger.error(f"Error checking Swarm cluster: {e}")
try:
run_all_http_checks()
except Exception as e:
logger.error(f"Error running HTTP checks: {e}")
try:
check_etcd_cluster()
except Exception as e:
logger.error(f"Error running etcd checks: {e}")
try:
check_swag_tls()
except Exception as e:
logger.error(f"Error running TLS checks: {e}")
try:
check_redis_sentinel()
except Exception as e:
logger.error(f"Error running Redis checks: {e}")
try:
check_mongodb()
except Exception as e:
logger.error(f"Error running MongoDB checks: {e}")
try:
check_storagebox_mount()
except Exception as e:
logger.error(f"Error running filesystem checks: {e}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="iklim.co Health Agent")
parser.add_argument("--once", action="store_true", help="Run checks once and exit")
parser.add_argument("--dry-run", action="store_true", help="Run checks but skip Uptime Kuma push")
args = parser.parse_args()
if args.dry_run:
uk.DRY_RUN = True
logger.info("Dry-run mode enabled — Uptime Kuma pushes will be skipped")
logger.info("Starting health-agent...")
if not args.dry_run:
start_docker_event_listener()
if args.once:
run_checks()
else:
while True:
run_checks()
time.sleep(60)