Update all hardcoded push monitor names in check files to match the new Title Case With Space format in monitors.yml. The uk_tokens.yml keys are derived from monitor names so the push() calls must match exactly.
58 lines
2.0 KiB
Python
58 lines
2.0 KiB
Python
import os
|
|
import time
|
|
import logging
|
|
from pymongo import MongoClient
|
|
from health_agent.uptime_kuma import push
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def check_mongodb():
|
|
start_t = time.time()
|
|
|
|
mongo_uri = os.getenv("MONGO_URI", "mongodb://mongodb-01:27017,mongodb-02:27017,mongodb-03:27017/?replicaSet=rs0")
|
|
cluster_size = int(os.getenv("CLUSTER_SIZE_MONGODB", "3"))
|
|
|
|
try:
|
|
with MongoClient(mongo_uri, serverSelectionTimeoutMS=3000) as client:
|
|
status = client.admin.command('replSetGetStatus')
|
|
|
|
members = status.get('members', [])
|
|
|
|
primary = None
|
|
secondaries = []
|
|
|
|
for m in members:
|
|
state_str = m.get('stateStr', '')
|
|
name = m.get('name', 'unknown')
|
|
|
|
if state_str == 'PRIMARY':
|
|
primary = name
|
|
elif state_str == 'SECONDARY':
|
|
secondaries.append((name, state_str))
|
|
else:
|
|
secondaries.append((name, state_str))
|
|
|
|
ping_ms = int((time.time() - start_t) * 1000)
|
|
|
|
if cluster_size == 1:
|
|
push("Mongodb Replicaset", "up", "standalone mode OK", ping_ms)
|
|
return
|
|
|
|
if primary:
|
|
sec_strs = [f"{s[0]} ({s[1]})" for s in secondaries]
|
|
msg = f"PRIMARY: {primary} | secondaries: {' '.join(sec_strs)}"
|
|
|
|
unhealthy_secs = [s for s in secondaries if s[1] not in ('SECONDARY', 'ARBITER')]
|
|
if unhealthy_secs:
|
|
msg = f"PRIMARY: {primary} | unhealthy: {','.join([s[0] + ':' + s[1] for s in unhealthy_secs])}"
|
|
push("Mongodb Replicaset", "down", msg, ping_ms)
|
|
else:
|
|
push("Mongodb Replicaset", "up", msg, ping_ms)
|
|
else:
|
|
msg = "no PRIMARY | quorum lost"
|
|
push("Mongodb Replicaset", "down", msg, ping_ms)
|
|
|
|
except Exception as e:
|
|
ping_ms = int((time.time() - start_t) * 1000)
|
|
push("Mongodb Replicaset", "down", f"connection failed: {e}", ping_ms)
|