1
0
Fork 0
mirror of https://github.com/munin-monitoring/contrib.git synced 2025-07-21 18:41:03 +00:00

raid2: yet another raid+scrub monitoring plugin

Supports:
- mdstat
- btrfs
- cciss
- megasasctl
This commit is contained in:
Kim B. Heino 2020-12-01 11:51:40 +02:00 committed by Lars Kruse
parent a92c9a9b67
commit d3bcc2f9bf

196
plugins/disk/raid2 Executable file
View file

@ -0,0 +1,196 @@
#!/usr/bin/python3 -tt
# -*- coding: utf-8 -*-
"""Munin plugin to monitor software and hardware RAID status and scrub status.
Copyright 2014 Kim B. Heino, Foobar Oy
License GPLv2+
#%# capabilities=autoconf
#%# family=auto
"""
import glob
import os
import re
import subprocess
import sys
def safename(variable):
"""Return safe variable name."""
if variable == '/':
return 'btrfs'
ret = []
for letter in variable:
if letter.isalnum():
ret.append(letter)
else:
ret.append('_')
return ''.join(ret)
def run_binary(arg):
"""Run binary and return output."""
try:
cmd = subprocess.Popen(
arg, shell=False, close_fds=True, bufsize=-1,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
outdata, dummy_errdata = cmd.communicate()
except OSError:
return None
return outdata.decode('utf-8', 'ignore').strip()
def find_cciss():
"""Parse /usr/bin/cciss_vol_status."""
statexe = '/usr/bin/cciss_vol_status'
# Find device files and binary
devfiles = sorted(glob.glob('/dev/sg*') + glob.glob('/dev/cciss/c*d0'))
if not devfiles or not os.path.exists(statexe):
return []
# Run binary
data = run_binary([statexe] + devfiles)
if not data:
return []
# Parse result
data = data.splitlines()
devices = []
for index, device in enumerate(devfiles):
if index == len(data):
break
if ' status: OK' in data[index]:
status = 1
elif ' status: ' not in data[index]:
continue
else:
status = 0
desc = 'Hardware RAID device {}'.format(device)
devices.append((device, status, desc))
return devices
def find_megasasctl():
"""Parse /usr/sbin/megasasctl."""
statexe = '/usr/sbin/megasasctl'
# Find binary
if not os.path.exists(statexe):
return []
# Run binary
data = run_binary(['/usr/sbin/megasasctl', '-HB'])
if data:
status = 0
else:
status = 1
return [('lsi', status, 'Hardware RAID device LSI')]
def find_mdstat():
"""Parse /proc/mdstat."""
# Read file
try:
fhn = open('/proc/mdstat')
except IOError:
return []
lines = fhn.readlines()
fhn.close()
# Parse it
devices = []
device = None
for line in lines:
if re.match(r'^md\d+ : ', line):
device = line.split()[0]
elif device:
if '_' in line:
status = 0
else:
status = 1
desc = 'Software RAID device {}'.format(device)
devices.append((device, status, desc))
device = None
return devices
def find_btrfs():
"""Parse /proc/mounts and btrfs scrub status. Ignore csum errors."""
# Read file
try:
fhn = open('/proc/mounts')
except IOError:
return []
lines = fhn.readlines()
fhn.close()
# Parse it
devmap = {}
for line in lines:
line = line.split()
if len(line) > 2 and line[2] == 'btrfs' and line[0] not in devmap:
devmap[line[0]] = line[1]
# Iterate devices
devices = []
for mount in devmap.values():
data = run_binary(['/sbin/btrfs', 'scrub', 'status', '-R', mount])
if not data or 'data_extents_scrubbed:' not in data:
continue
desc = 'BTRFS in {}'.format(mount)
if ( # pylint: disable=too-many-boolean-expressions
'read_errors: 0' in data and
'verify_errors: 0' in data and
'super_errors: 0' in data and
'malloc_errors: 0' in data and
'uncorrectable_errors: 0' in data and
'unverified_errors: 0' in data
):
devices.append((mount, 1, desc))
else:
devices.append((mount, 0, desc))
return devices
def find_devices():
"""Return list of found device tuples."""
devices = find_cciss() + find_megasasctl() + find_mdstat() + find_btrfs()
return devices
def autoconf():
"""Print "yes" or "no"."""
status = 'yes' if find_devices() else 'no'
print(status)
def config(devices):
"""Print plugin config."""
print('graph_title RAID and Scrub Status')
print('graph_vlabel Status')
print('graph_category disk')
print('graph_info Health status: 0 = Error, 1 = OK')
print('graph_args --base 1000 --lower-limit 0 --upper-limit 1')
for device in devices:
print('{}.label {}'.format(safename(device[0]), device[2]))
print('{}.warning 1:'.format(safename(device[0])))
if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1':
fetch(devices)
def fetch(devices):
"""Print values."""
for device in devices:
print('{}.value {}'.format(safename(device[0]), device[1]))
if __name__ == '__main__':
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
autoconf()
elif len(sys.argv) > 1 and sys.argv[1] == 'config':
config(find_devices())
else:
fetch(find_devices())