mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
raid2: yet another raid+scrub monitoring plugin
Supports: - mdstat - btrfs - cciss - megasasctl
This commit is contained in:
parent
a92c9a9b67
commit
d3bcc2f9bf
1 changed files with 196 additions and 0 deletions
196
plugins/disk/raid2
Executable file
196
plugins/disk/raid2
Executable file
|
@ -0,0 +1,196 @@
|
|||
#!/usr/bin/python3 -tt
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Munin plugin to monitor software and hardware RAID status and scrub status.
|
||||
|
||||
Copyright 2014 Kim B. Heino, Foobar Oy
|
||||
License GPLv2+
|
||||
|
||||
#%# capabilities=autoconf
|
||||
#%# family=auto
|
||||
"""
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def safename(variable):
|
||||
"""Return safe variable name."""
|
||||
if variable == '/':
|
||||
return 'btrfs'
|
||||
ret = []
|
||||
for letter in variable:
|
||||
if letter.isalnum():
|
||||
ret.append(letter)
|
||||
else:
|
||||
ret.append('_')
|
||||
return ''.join(ret)
|
||||
|
||||
|
||||
def run_binary(arg):
|
||||
"""Run binary and return output."""
|
||||
try:
|
||||
cmd = subprocess.Popen(
|
||||
arg, shell=False, close_fds=True, bufsize=-1,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
outdata, dummy_errdata = cmd.communicate()
|
||||
except OSError:
|
||||
return None
|
||||
return outdata.decode('utf-8', 'ignore').strip()
|
||||
|
||||
|
||||
def find_cciss():
|
||||
"""Parse /usr/bin/cciss_vol_status."""
|
||||
statexe = '/usr/bin/cciss_vol_status'
|
||||
|
||||
# Find device files and binary
|
||||
devfiles = sorted(glob.glob('/dev/sg*') + glob.glob('/dev/cciss/c*d0'))
|
||||
if not devfiles or not os.path.exists(statexe):
|
||||
return []
|
||||
|
||||
# Run binary
|
||||
data = run_binary([statexe] + devfiles)
|
||||
if not data:
|
||||
return []
|
||||
|
||||
# Parse result
|
||||
data = data.splitlines()
|
||||
devices = []
|
||||
for index, device in enumerate(devfiles):
|
||||
if index == len(data):
|
||||
break
|
||||
if ' status: OK' in data[index]:
|
||||
status = 1
|
||||
elif ' status: ' not in data[index]:
|
||||
continue
|
||||
else:
|
||||
status = 0
|
||||
desc = 'Hardware RAID device {}'.format(device)
|
||||
devices.append((device, status, desc))
|
||||
return devices
|
||||
|
||||
|
||||
def find_megasasctl():
|
||||
"""Parse /usr/sbin/megasasctl."""
|
||||
statexe = '/usr/sbin/megasasctl'
|
||||
|
||||
# Find binary
|
||||
if not os.path.exists(statexe):
|
||||
return []
|
||||
|
||||
# Run binary
|
||||
data = run_binary(['/usr/sbin/megasasctl', '-HB'])
|
||||
if data:
|
||||
status = 0
|
||||
else:
|
||||
status = 1
|
||||
return [('lsi', status, 'Hardware RAID device LSI')]
|
||||
|
||||
|
||||
def find_mdstat():
|
||||
"""Parse /proc/mdstat."""
|
||||
# Read file
|
||||
try:
|
||||
fhn = open('/proc/mdstat')
|
||||
except IOError:
|
||||
return []
|
||||
lines = fhn.readlines()
|
||||
fhn.close()
|
||||
|
||||
# Parse it
|
||||
devices = []
|
||||
device = None
|
||||
for line in lines:
|
||||
if re.match(r'^md\d+ : ', line):
|
||||
device = line.split()[0]
|
||||
elif device:
|
||||
if '_' in line:
|
||||
status = 0
|
||||
else:
|
||||
status = 1
|
||||
desc = 'Software RAID device {}'.format(device)
|
||||
devices.append((device, status, desc))
|
||||
device = None
|
||||
return devices
|
||||
|
||||
|
||||
def find_btrfs():
|
||||
"""Parse /proc/mounts and btrfs scrub status. Ignore csum errors."""
|
||||
# Read file
|
||||
try:
|
||||
fhn = open('/proc/mounts')
|
||||
except IOError:
|
||||
return []
|
||||
lines = fhn.readlines()
|
||||
fhn.close()
|
||||
|
||||
# Parse it
|
||||
devmap = {}
|
||||
for line in lines:
|
||||
line = line.split()
|
||||
if len(line) > 2 and line[2] == 'btrfs' and line[0] not in devmap:
|
||||
devmap[line[0]] = line[1]
|
||||
|
||||
# Iterate devices
|
||||
devices = []
|
||||
for mount in devmap.values():
|
||||
data = run_binary(['/sbin/btrfs', 'scrub', 'status', '-R', mount])
|
||||
if not data or 'data_extents_scrubbed:' not in data:
|
||||
continue
|
||||
desc = 'BTRFS in {}'.format(mount)
|
||||
if ( # pylint: disable=too-many-boolean-expressions
|
||||
'read_errors: 0' in data and
|
||||
'verify_errors: 0' in data and
|
||||
'super_errors: 0' in data and
|
||||
'malloc_errors: 0' in data and
|
||||
'uncorrectable_errors: 0' in data and
|
||||
'unverified_errors: 0' in data
|
||||
):
|
||||
devices.append((mount, 1, desc))
|
||||
else:
|
||||
devices.append((mount, 0, desc))
|
||||
return devices
|
||||
|
||||
|
||||
def find_devices():
|
||||
"""Return list of found device tuples."""
|
||||
devices = find_cciss() + find_megasasctl() + find_mdstat() + find_btrfs()
|
||||
return devices
|
||||
|
||||
|
||||
def autoconf():
|
||||
"""Print "yes" or "no"."""
|
||||
status = 'yes' if find_devices() else 'no'
|
||||
print(status)
|
||||
|
||||
|
||||
def config(devices):
|
||||
"""Print plugin config."""
|
||||
print('graph_title RAID and Scrub Status')
|
||||
print('graph_vlabel Status')
|
||||
print('graph_category disk')
|
||||
print('graph_info Health status: 0 = Error, 1 = OK')
|
||||
print('graph_args --base 1000 --lower-limit 0 --upper-limit 1')
|
||||
for device in devices:
|
||||
print('{}.label {}'.format(safename(device[0]), device[2]))
|
||||
print('{}.warning 1:'.format(safename(device[0])))
|
||||
if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1':
|
||||
fetch(devices)
|
||||
|
||||
|
||||
def fetch(devices):
|
||||
"""Print values."""
|
||||
for device in devices:
|
||||
print('{}.value {}'.format(safename(device[0]), device[1]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
|
||||
autoconf()
|
||||
elif len(sys.argv) > 1 and sys.argv[1] == 'config':
|
||||
config(find_devices())
|
||||
else:
|
||||
fetch(find_devices())
|
Loading…
Add table
Add a link
Reference in a new issue