-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
Copy pathhealthd
108 lines (87 loc) · 3.34 KB
/
healthd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python2
"""
healthd
System health monitor daemon for SONiC
"""
import signal
import threading
from sonic_py_common.daemon_base import DaemonBase
from swsssdk import SonicV2Connector
from health_checker.manager import HealthCheckerManager
SYSLOG_IDENTIFIER = 'healthd'
class HealthDaemon(DaemonBase):
"""
A daemon that run as a service to perform system health checker with a configurable interval. Also set system LED
according to the check result and store the check result to redis.
"""
SYSTEM_HEALTH_TABLE_NAME = 'SYSTEM_HEALTH_INFO'
def __init__(self):
"""
Constructor of HealthDaemon.
"""
DaemonBase.__init__(self, SYSLOG_IDENTIFIER)
self._db = SonicV2Connector(host="127.0.0.1")
self._db.connect(self._db.STATE_DB)
self.stop_event = threading.Event()
def deinit(self):
"""
Destructor. Remove all entries in $SYSTEM_HEALTH_TABLE_NAME table.
:return:
"""
self._clear_system_health_table()
def _clear_system_health_table(self):
self._db.delete_all_by_pattern(self._db.STATE_DB, HealthDaemon.SYSTEM_HEALTH_TABLE_NAME)
# Signal handler
def signal_handler(self, sig, frame):
"""
Signal handler
:param sig: Signal number
:param frame: not used
:return:
"""
if sig == signal.SIGHUP:
self.log_notice("Caught SIGHUP - ignoring...")
elif sig == signal.SIGINT:
self.log_notice("Caught SIGINT - exiting...")
self.stop_event.set()
elif sig == signal.SIGTERM:
self.log_notice("Caught SIGTERM - exiting...")
self.stop_event.set()
else:
self.log_warning("Caught unhandled signal '" + sig + "'")
def run(self):
"""
Check system health in an infinite loop.
:return:
"""
self.log_notice("Starting up...")
import sonic_platform.platform
chassis = sonic_platform.platform.Platform().get_chassis()
manager = HealthCheckerManager()
if not manager.config.config_file_exists():
self.log_warning("System health configuration file not found, exit...")
return
while 1:
state, stat = manager.check(chassis)
if state == HealthCheckerManager.STATE_RUNNING:
self._process_stat(chassis, manager.config, stat)
if self.stop_event.wait(manager.config.interval):
break
self.deinit()
def _process_stat(self, chassis, config, stat):
from health_checker.health_checker import HealthChecker
self._clear_system_health_table()
for category, info in stat.items():
for obj_name, obj_data in info.items():
if obj_data[HealthChecker.INFO_FIELD_OBJECT_STATUS] == HealthChecker.STATUS_NOT_OK:
self._db.set(self._db.STATE_DB, HealthDaemon.SYSTEM_HEALTH_TABLE_NAME, obj_name,
obj_data[HealthChecker.INFO_FIELD_OBJECT_MSG])
self._db.set(self._db.STATE_DB, HealthDaemon.SYSTEM_HEALTH_TABLE_NAME, 'summary', HealthChecker.summary)
#
# Main =========================================================================
#
def main():
health_monitor = HealthDaemon()
health_monitor.run()
if __name__ == '__main__':
main()