From c43223a16de9f1c65e0aa14fd6a71837655385f8 Mon Sep 17 00:00:00 2001 From: Louis Lam Date: Wed, 1 Nov 2023 09:36:12 +0800 Subject: [PATCH] Restart running monitors if no heartbeat (#3952) --- server/model/monitor.js | 18 +++++++- server/uptime-kuma-server.js | 89 ++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/server/model/monitor.js b/server/model/monitor.js index 1e08ccd4..c2d5a9a4 100644 --- a/server/model/monitor.js +++ b/server/model/monitor.js @@ -3,7 +3,7 @@ const dayjs = require("dayjs"); const axios = require("axios"); const { Prometheus } = require("../prometheus"); const { log, UP, DOWN, PENDING, MAINTENANCE, flipStatus, TimeLogger, MAX_INTERVAL_SECOND, MIN_INTERVAL_SECOND, - SQL_DATETIME_FORMAT + SQL_DATETIME_FORMAT, isDev, sleep, getRandomInt } = require("../../src/util"); const { tcping, ping, dnsResolve, checkCertificate, checkStatusCode, getTotalClientInRoom, setting, mssqlQuery, postgresQuery, mysqlQuery, mqttAsync, setSetting, httpNtlm, radius, grpcQuery, redisPingAsync, mongodbPing, kafkaProducerAsync, getOidcTokenClientCredentials, rootCertificatesFingerprints @@ -328,6 +328,16 @@ class Monitor extends BeanModel { } } + // Evil + if (isDev) { + if (process.env.EVIL_RANDOM_MONITOR_SLEEP === "SURE") { + if (getRandomInt(0, 100) === 0) { + log.debug("evil", `[${this.name}] Evil mode: Random sleep: ` + beatInterval * 10000); + await sleep(beatInterval * 10000); + } + } + } + // Expose here for prometheus update // undefined if not https let tlsInfo = undefined; @@ -995,6 +1005,7 @@ class Monitor extends BeanModel { if (! this.isStop) { log.debug("monitor", `[${this.name}] SetTimeout for next check.`); this.heartbeatInterval = setTimeout(safeBeat, beatInterval * 1000); + this.lastScheduleBeatTime = dayjs(); } else { log.info("monitor", `[${this.name}] isStop = true, no next check.`); } @@ -1004,7 +1015,9 @@ class Monitor extends BeanModel { /** Get a heartbeat and handle errors */ const safeBeat = async () => { try { + this.lastStartBeatTime = dayjs(); await beat(); + this.lastEndBeatTime = dayjs(); } catch (e) { console.trace(e); UptimeKumaServer.errorLog(e, false); @@ -1013,6 +1026,9 @@ class Monitor extends BeanModel { if (! this.isStop) { log.info("monitor", "Try to restart the monitor"); this.heartbeatInterval = setTimeout(safeBeat, this.interval * 1000); + this.lastScheduleBeatTime = dayjs(); + } else { + log.info("monitor", "isStop = true, no next check."); } } }; diff --git a/server/uptime-kuma-server.js b/server/uptime-kuma-server.js index 6acc8d4d..6b1d3d01 100644 --- a/server/uptime-kuma-server.js +++ b/server/uptime-kuma-server.js @@ -12,6 +12,7 @@ const { Settings } = require("./settings"); const dayjs = require("dayjs"); const childProcess = require("child_process"); const path = require("path"); +const axios = require("axios"); // DO NOT IMPORT HERE IF THE MODULES USED `UptimeKumaServer.getInstance()`, put at the bottom of this file instead. /** @@ -62,6 +63,8 @@ class UptimeKumaServer { */ jwtSecret = null; + checkMonitorsInterval = null; + static getInstance(args) { if (UptimeKumaServer.instance == null) { UptimeKumaServer.instance = new UptimeKumaServer(args); @@ -75,6 +78,9 @@ class UptimeKumaServer { const sslCert = args["ssl-cert"] || process.env.UPTIME_KUMA_SSL_CERT || process.env.SSL_CERT || undefined; const sslKeyPassphrase = args["ssl-key-passphrase"] || process.env.UPTIME_KUMA_SSL_KEY_PASSPHRASE || process.env.SSL_KEY_PASSPHRASE || undefined; + // Set default axios timeout to 5 minutes instead of infinity + axios.defaults.timeout = 300 * 1000; + log.info("server", "Creating express and socket.io instance"); this.app = express(); if (sslKey && sslCert) { @@ -346,6 +352,10 @@ class UptimeKumaServer { if (enable || enable === null) { this.startNSCDServices(); } + + this.checkMonitorsInterval = setInterval(() => { + this.checkMonitors(); + }, 60 * 1000); } /** @@ -358,6 +368,8 @@ class UptimeKumaServer { if (enable || enable === null) { this.stopNSCDServices(); } + + clearInterval(this.checkMonitorsInterval); } /** @@ -388,6 +400,83 @@ class UptimeKumaServer { } } } + + /** + * Start the specified monitor + * @param {number} monitorID ID of monitor to start + * @returns {Promise} + */ + async startMonitor(monitorID) { + log.info("manage", `Resume Monitor: ${monitorID} by server`); + + await R.exec("UPDATE monitor SET active = 1 WHERE id = ?", [ + monitorID, + ]); + + let monitor = await R.findOne("monitor", " id = ? ", [ + monitorID, + ]); + + if (monitor.id in this.monitorList) { + this.monitorList[monitor.id].stop(); + } + + this.monitorList[monitor.id] = monitor; + monitor.start(this.io); + } + + /** + * Restart a given monitor + * @param {number} monitorID ID of monitor to start + * @returns {Promise} + */ + async restartMonitor(monitorID) { + return await this.startMonitor(monitorID); + } + + /** + * Check if monitors are running properly + */ + async checkMonitors() { + log.debug("monitor_checker", "Checking monitors"); + + for (let monitorID in this.monitorList) { + let monitor = this.monitorList[monitorID]; + + // Not for push monitor + if (monitor.type === "push") { + continue; + } + + if (!monitor.active) { + continue; + } + + // Check the lastStartBeatTime, if it is too long, then restart + if (monitor.lastScheduleBeatTime ) { + let diff = dayjs().diff(monitor.lastStartBeatTime, "second"); + + if (diff > monitor.interval * 1.5) { + log.error("monitor_checker", `Monitor Interval: ${monitor.interval} Monitor ` + monitorID + " lastStartBeatTime diff: " + diff); + log.error("monitor_checker", "Unexpected error: Monitor " + monitorID + " is struck for unknown reason"); + log.error("monitor_checker", "Last start beat time: " + R.isoDateTime(monitor.lastStartBeatTime)); + log.error("monitor_checker", "Last end beat time: " + R.isoDateTime(monitor.lastEndBeatTime)); + log.error("monitor_checker", "Last ScheduleBeatTime: " + R.isoDateTime(monitor.lastScheduleBeatTime)); + + // Restart + log.error("monitor_checker", `Restarting monitor ${monitorID} automatically now`); + this.restartMonitor(monitorID); + } else { + //log.debug("monitor_checker", "Monitor " + monitorID + " is running normally"); + } + } else { + //log.debug("monitor_checker", "Monitor " + monitorID + " is not started yet, skipp"); + } + + } + + log.debug("monitor_checker", "Checking monitors end"); + } } module.exports = {