forked from ksonney/megaclisas-status
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmegaclisas-status
executable file
·313 lines (246 loc) · 10.9 KB
/
megaclisas-status
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
#!/usr/bin/python
'''LSI MegaCLI RAID status tool.
Usage:
megaclisas-status [--nagios | --snmp | -q]
Options:
--nagios Format output suitable for nagios.
--snmp Format output for SNMP.
-q Quiet mode, only displays output if an array or disk is bad -
intended for use from cron.
Known issues:
- MegaCLI return codes can not be trusted which makes it difficult to detect
errors in its output.
- Not yet tested with multiple adapters.
'''
import os
import subprocess
import argparse
import re
import sys
# default paths for debian/Ubuntu/etc, RHEL/CentOS/etc
# Add another binary to the array to detect it on your platform
BINARYPATHS = ['/usr/sbin/megacli', '/opt/MegaRAID/MegaCli/MegaCli64']
BINARYPATH = ''
def get_output(cmd):
try:
process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
retcode = process.poll()
except OSError as e:
sys.exit('Failed to run %s, command returned %s.' % (cmd, str(e)))
return (stdout, stderr, retcode)
def get_megacli_output(arguments):
'''Run the specified megacli command and return stdout as a list.
Lines returned are stripped and empty lines removed.
Note that the return codes from megacli can not be trusted to be POSIX
compatible - for a number of commands that complete successfully it will
exit 1!
'''
cmd = BINARYPATH + ' ' + arguments
stdout, stderr, retcode = get_output(cmd)
lines = filter(None, [line.strip() for line in stdout.split('\n')])
# Idiotic, however this seems to be the only way to get vaguely reasonable
# error output from megacli
if stderr and retcode != 0:
sys.exit('Failed to run "%s", command returned %s.' % (cmd, stderr.strip()))
return lines
def get_line_value(line, chars=None):
return line.split(':')[1].strip(chars)
def get_number_of_controllers():
for line in get_megacli_output('-adpCount -NoLog'):
if re.match(r'^Controller Count.*$', line):
return int(get_line_value(line, ' .'))
def get_arrays(controller_id):
for line in get_megacli_output('-LDInfo -Lall -a%d -NoLog' % controller_id):
m = re.match(r'^Virtual (Disk|Drive): (\d+).*$', line)
if m:
yield int(m.group(2))
def get_controller_info(controller_id):
'''Retrieve the basic controller details'''
model = None
firmware_version = None
for line in get_megacli_output('-AdpAllInfo -a%d -NoLog' % controller_id):
if re.match(r'^Product Name.*$', line):
model = get_line_value(line)
elif re.match(r'^FW Package Build.*$', line):
firmware_version = get_line_value(line)
elif model and firmware_version:
break
return model, firmware_version
def get_array_info(controller_id, array_id):
# @TODO - Merge this with get_arrays to avoid requesting the same data
# twice.
_id = 'c%du%d' % (controller_id, array_id)
line_number = 0
ldpd_count = 0
span_depth = 0
in_progress = 'None'
cachecade_info = 'None'
output = get_megacli_output('-LDInfo -l%d -a%d -NoLog' % (array_id, controller_id))
for line in output:
if re.match(r'Number Of Drives\s*((per span))?:.*[0-9]+$', line):
ldpd_count = get_line_value(line)
elif re.match(r'Span Depth *:.*[0-9]+$', line):
span_depth = get_line_value(line)
elif re.match(r'^RAID Level\s*:.*$', line):
raid_level = line.split(':')[1].split(',')[0].split('-')[1].strip()
raid_type = 'RAID' + raid_level
elif re.match(r'^Size\s*:.*$', line):
size = get_line_value(line)
# Size reported in MB
if re.match(r'^.*MB$', size):
size = size.strip(' MB')
size = str(int(round((float(size) / 1000)))) + 'G'
# Size reported in TB
elif re.match(r'^.*TB$', size):
size = size.strip(' TB')
size = str(int(round((float(size) * 1000)))) + 'G'
# Size reported in GB (default)
else:
size = size.strip(' GB')
size = str(int(round((float(size))))) + 'G'
elif re.match(r'^State\s*:.*$', line):
state = get_line_value(line)
elif re.match(r'^Ongoing Progresses\s*:.*$', line):
# @todo matches original behaviour, not sure if it is correct
# however?
in_progress = output[line_number + 1]
elif re.match(r'Cache Cade Type\s*:.*$', line):
cachecade_info = "Type : %s" % (get_line_value(line))
elif re.match(r'^Target Id of the Associated LDs\s*:.*$', line):
associated = []
for array in line.split(':')[1].strip().split(','):
associated.append('c%du%d' % (controller_id, array_id))
cachecade_info = "Associated : %s" % (', '.join(associated))
line_number += 1
if ldpd_count and (int(span_depth) > 1):
ldpd_count = int(ldpd_count) * int(span_depth)
if int(raid_level) < 10:
raid_type = raid_type + "0"
return (_id, raid_type, size, state, in_progress, cachecade_info)
def get_disk_info(controller_id):
# @todo Surely there has to be a way to get this on a per controller/array
# combination
array_id = False
disk_id = False
table = []
state = 'undef'
model = 'undef'
for line in get_megacli_output('-LdPdInfo -a%d -NoLog' % controller_id):
if re.match(r'^(CacheCade )?Virtual (Disk|Drive): [0-9]+.*$', line):
array_id = line.split('(')[0].split(':')[1].strip()
elif re.match(r'Firmware state: .*$', line):
state = get_line_value(line)
elif re.match(r'Inquiry Data: .*$', line):
model = re.sub(' +', ' ', get_line_value(line))
elif re.match(r'PD: [0-9]+ Information.*$', line):
disk_id = line.split()[1].strip()
if array_id and state != 'undef' and model != 'undef' and disk_id:
table.append((int(array_id), int(disk_id), state, model))
state = 'undef'
model = 'undef'
return table
def display_standard(data, bad):
print '-- Controller information --'
print '-- ID | Model | Firmware'
for controller_id in data:
controller_model, firmware_version = get_controller_info(controller_id)
print 'c%d | %s | %s' % (controller_id, controller_model, firmware_version)
print ''
print '-- Array information --'
print '-- ID | Type | Size | Status | InProgress | CacheCade Info'
for controller_id in data:
for array_id in data[controller_id]:
print ' | '.join(data[controller_id][array_id]['details'])
print ''
print '-- Disk information --'
print '-- ID | Model | Status'
for controller_id in data:
for array_id in data[controller_id]:
for disk in data[controller_id][array_id]['disks']:
print 'c%du%dp%d | %s | %s' % (controller_id, disk[0],
disk[1], disk[3], disk[2])
if bad:
print '\nThere is at least one disk/array in a NOT OPTIMAL state.'
sys.exit(1)
def display_nagios(good_arrays, bad_arrays, good_disks, bad_disks, bad):
status_line = 'Arrays: OK:%d Bad:%d - Disks: OK:%d Bad:%d' % \
(good_arrays, bad_arrays, good_disks, bad_disks)
if bad:
print 'RAID ERROR - ' + status_line
sys.exit(2)
print 'RAID OK - ' + status_line
def display_snmp(good_arrays, bad_arrays, good_disks, bad_disks, bad):
print '1%d%d%d%d%d' % (int(bad), good_arrays, bad_arrays, good_disks, bad_disks)
def get_raid_status():
data = {}
controller_id = 0
good_arrays = 0
bad_arrays = 0
good_disks = 0
bad_disks = 0
while controller_id < get_number_of_controllers():
data[controller_id] = {}
# Populate array details
#
# @TODO - Merge get_arrays and get_array_info.
#
# The megacli command in get_arrays will return all the info we need.
#
# We should parse the data at that point rather than making another
# call to get the same info via get_array_info.
for array_id in get_arrays(controller_id):
data[controller_id][array_id] = {'disks': []}
array_details = get_array_info(controller_id, array_id)
data[controller_id][array_id]['details'] = array_details
if array_details[3] == 'Optimal':
good_arrays += 1
else:
bad_arrays += 1
# Populate disk information
for disk in get_disk_info(controller_id):
data[controller_id][disk[0]]['disks'].append(disk)
if disk[2] in ('Online', 'Online, Spun Up'):
good_disks += 1
else:
bad_disks += 1
controller_id += 1
return data, good_arrays, bad_arrays, good_disks, bad_disks
if __name__ == '__main__':
# If we are running under snmpd, we don't get user info, so assume root if
# no username present
username = os.getenv('USER') if os.getenv('USER') else 'root'
# Adding a quick check to see if we're root, because on most cards I've
# tried this on We need root access to query
# @todo replace or augment this - what about users in wheel/adm groups?
if username != 'root':
print 'You can only run this script as root or with sudo, sucks I know. Blame the RAID card. User is ' + username
sys.exit(5)
parser = argparse.ArgumentParser(description='LSI MegaCLI RAID status tool.')
group = parser.add_mutually_exclusive_group()
group.add_argument('--nagios', help='Format output suitable for nagios.', action='store_true')
group.add_argument('--snmp', help='Format output suitable for SNMP.', action='store_true')
group.add_argument('-q', '--quiet', help='Quiet mode, only displays output if an array or disk is bad - intended for use from cron (not applicable to nagios or snmp mode).', action='store_true')
args = parser.parse_args()
# Check binary exists (and +x), if not print an error message
# or return UNKNOWN nagios error code
for binarytestpath in BINARYPATHS:
if os.path.exists(binarytestpath) and os.access(binarytestpath, os.X_OK):
BINARYPATH = binarytestpath
break
else:
if args.nagios:
print 'UNKNOWN - Cannot find ' + BINARYPATH
elif args.snmp:
print '0'
else:
print 'Cannot find ' + BINARYPATH + '. Please install it.'
sys.exit(3)
data, good_arrays, bad_arrays, good_disks, bad_disks = get_raid_status()
bad = True if bad_arrays or bad_disks else False
if args.nagios:
display_nagios(good_arrays, bad_arrays, good_disks, bad_disks, bad)
elif args.snmp:
display_snmp(good_arrays, bad_arrays, good_disks, bad_disks, bad)
elif bad or not args.quiet:
display_standard(data, bad)