mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
226 lines
7.7 KiB
Python
Executable file
226 lines
7.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
"""Munin plugin to monitor Gluster volume and brick status.
|
|
|
|
=head1 NAME
|
|
|
|
gluster - monitor Gluster volume and brick status
|
|
|
|
=head1 APPLICABLE SYSTEMS
|
|
|
|
Linux systems with Gluster volumes.
|
|
|
|
=head1 CONFIGURATION
|
|
|
|
This plugin must be run as root:
|
|
|
|
[gluster]
|
|
user root
|
|
|
|
=head1 AUTHOR
|
|
|
|
Kim B. Heino <b@bbbs.net>
|
|
|
|
=head1 LICENSE
|
|
|
|
GPLv2
|
|
|
|
=head1 MAGIC MARKERS
|
|
|
|
#%# family=auto
|
|
#%# capabilities=autoconf
|
|
|
|
=cut
|
|
"""
|
|
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import xml.etree.ElementTree
|
|
|
|
|
|
def run_command(command):
|
|
"""Run gluster command and return it's output as etree."""
|
|
try:
|
|
text = subprocess.run(['gluster', '--mode=script', '--xml'] + command,
|
|
check=False, stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE, encoding='utf-8').stdout
|
|
except FileNotFoundError:
|
|
return None
|
|
try:
|
|
return xml.etree.ElementTree.fromstring(text)
|
|
except xml.etree.ElementTree.ParseError:
|
|
return None
|
|
|
|
|
|
def safe_name(name):
|
|
"""Return safe variable name."""
|
|
value = ''.join(char.lower() if char.isalnum() else '' for char in name)
|
|
return value or 'root' # "/" is "root"
|
|
|
|
|
|
def brick_name(name):
|
|
"""Return short version of brick's name, strip domain from hostname."""
|
|
if ':/' not in name:
|
|
return name
|
|
host, path = name.split(':', 1)
|
|
return f'{host.split(".")[0]}:{path}'
|
|
|
|
|
|
def find_volumes(need_details):
|
|
"""Find gluster volumes."""
|
|
# gluster --mode=script --xml volume info all
|
|
tree = run_command(['volume', 'info', 'all'])
|
|
if not tree:
|
|
return None
|
|
volumes = []
|
|
for volume in tree.findall('volInfo/volumes/volume'):
|
|
value = {
|
|
'name': volume.find('name').text,
|
|
'uuid': volume.find('id').text,
|
|
'status': volume.find('status').text == '1',
|
|
'bricks': [],
|
|
}
|
|
for brick in volume.findall('bricks/brick'):
|
|
value['bricks'].append({
|
|
'name': brick_name(brick.find('name').text),
|
|
'uuid': brick.find('hostUuid').text,
|
|
'status': False,
|
|
'disk_total': 0,
|
|
'disk_free': 0,
|
|
'inode_total': 0,
|
|
'inode_free': 0,
|
|
'heal_pending': 'U',
|
|
'heal_split': 'U',
|
|
'heal_healing': 'U',
|
|
})
|
|
volumes.append(value)
|
|
|
|
# Don't get detailed status unless needed. It can be slow.
|
|
if not need_details:
|
|
return volumes
|
|
|
|
# gluster --mode=script --xml volume status all detail
|
|
tree = run_command(['volume', 'status', 'all', 'detail'])
|
|
for node in tree.findall('volStatus/volumes/volume/node'):
|
|
uuid = node.find('peerid').text
|
|
for volume in volumes:
|
|
for brick in volume['bricks']:
|
|
if brick['uuid'] == uuid:
|
|
brick.update({
|
|
'status': node.find('status').text == '1',
|
|
'disk_total': int(node.find('sizeTotal').text),
|
|
'disk_free': int(node.find('sizeFree').text),
|
|
'inode_total': int(node.find('inodesTotal').text),
|
|
'inode_free': int(node.find('inodesFree').text),
|
|
})
|
|
|
|
# gluster --mode-script --xml volume heal <volumename> info summary
|
|
for volume in volumes:
|
|
tree = run_command(['volume', 'heal', volume['name'], 'info',
|
|
'summary'])
|
|
for node in tree.findall('healInfo/bricks/brick'):
|
|
uuid = node.attrib['hostUuid']
|
|
for brick in volume['bricks']:
|
|
if brick['uuid'] == uuid:
|
|
brick.update({
|
|
'heal_pending': node.find(
|
|
'numberOfEntriesInHealPending').text,
|
|
'heal_split': node.find(
|
|
'numberOfEntriesInSplitBrain').text,
|
|
'heal_healing': node.find(
|
|
'numberOfEntriesPossiblyHealing').text,
|
|
})
|
|
return volumes
|
|
|
|
|
|
def print_status(config):
|
|
"""Print config or values."""
|
|
# pylint: disable=too-many-branches
|
|
# pylint: disable=too-many-statements
|
|
both = os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1'
|
|
volumes = find_volumes(not config or both)
|
|
if not volumes:
|
|
return
|
|
|
|
# Volume started, all bricks are online
|
|
print('multigraph gluster_status')
|
|
if config or both:
|
|
print('graph_title Gluster volume status')
|
|
print('graph_vlabel Status')
|
|
print('graph_category disk')
|
|
print('graph_info Status: 0 = Stopped, 1 = Degraded, 2 = OK')
|
|
print('graph_args --lower-limit 0 --upper-limit 2')
|
|
print('graph_scale no')
|
|
for volume in volumes:
|
|
name = safe_name(volume['name'])
|
|
if config or both:
|
|
print(f'{name}.label Volume {volume["name"]}')
|
|
print(f'{name}.warning 2:')
|
|
if not config or both:
|
|
status = int(volume['status'])
|
|
if status and all(brick['status'] for brick in volume['bricks']):
|
|
status = 2
|
|
print(f'{name}.value {status}')
|
|
|
|
# Brick heal status
|
|
for volume in volumes:
|
|
name = safe_name(volume['name'])
|
|
print(f'multigraph gluster_heal_{name}')
|
|
if config or both:
|
|
print(f'graph_title Gluster volume {name} brick status')
|
|
print('graph_vlabel Entries')
|
|
print('graph_category disk')
|
|
print('graph_args --base 1000 --lower-limit 0')
|
|
for brick in volume['bricks']:
|
|
bname = safe_name(brick['uuid'])
|
|
print(f'{bname}_pending.label {brick["name"]} '
|
|
'in heal pending')
|
|
print(f'{bname}_pending.warning 0')
|
|
print(f'{bname}_split.label {brick["name"]} '
|
|
'in split-brain')
|
|
print(f'{bname}_split.warning 0')
|
|
print(f'{bname}_healing.label {brick["name"]} '
|
|
'possibly healing')
|
|
print(f'{bname}_healing.warning 0')
|
|
if not config or both:
|
|
for brick in volume['bricks']:
|
|
bname = safe_name(brick['uuid'])
|
|
print(f'{bname}_pending.value {brick["heal_pending"]}')
|
|
print(f'{bname}_split.value {brick["heal_split"]}')
|
|
print(f'{bname}_healing.value {brick["heal_healing"]}')
|
|
|
|
# Brick disk/inode free
|
|
for gtype in ('disk', 'inode'):
|
|
for volume in volumes:
|
|
name = safe_name(volume['name'])
|
|
print(f'multigraph gluster_df_{gtype}_{name}')
|
|
if config or both:
|
|
print(f'graph_title Gluster volume {name} {gtype} usage '
|
|
'in percent')
|
|
print('graph_vlabel %')
|
|
print('graph_category disk')
|
|
print('graph_args --lower-limit 0 --upper-limit 100')
|
|
print('graph_scale no')
|
|
for brick in volume['bricks']:
|
|
bname = safe_name(brick['uuid'])
|
|
print(f'{bname}.label {brick["name"]}')
|
|
if not config or both:
|
|
for brick in volume['bricks']:
|
|
bname = safe_name(brick['uuid'])
|
|
if not brick[f'{gtype}_total']:
|
|
print(f'{bname}.value U')
|
|
else:
|
|
value = (100 - 100 * brick[f'{gtype}_free'] /
|
|
brick[f'{gtype}_total'])
|
|
print(f'{bname}.value {value}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
|
|
print('yes' if find_volumes(False) else
|
|
'no (no Gluster volumes found)')
|
|
elif len(sys.argv) > 1 and sys.argv[1] == 'config':
|
|
print_status(True)
|
|
else:
|
|
print_status(False)
|