mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
Merge pull request #1449 from kimheino/master
disk/gluster: plugin to monitor Gluster volume and brick status
This commit is contained in:
commit
1dc2844cbb
1 changed files with 226 additions and 0 deletions
226
plugins/disk/gluster
Executable file
226
plugins/disk/gluster
Executable file
|
@ -0,0 +1,226 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""Munin plugin to monitor Gluster volume and brick status.
|
||||
|
||||
=head1 NAME
|
||||
|
||||
gluster - monitor Gluster volume and brick status
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
Linux systems with Gluster volumes.
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
This plugin must be run as root:
|
||||
|
||||
[gluster]
|
||||
user root
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Kim B. Heino <b@bbbs.net>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
=cut
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import xml.etree.ElementTree
|
||||
|
||||
|
||||
def run_command(command):
|
||||
"""Run gluster command and return it's output as etree."""
|
||||
try:
|
||||
text = subprocess.run(['gluster', '--mode=script', '--xml'] + command,
|
||||
check=False, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE, encoding='utf-8').stdout
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
try:
|
||||
return xml.etree.ElementTree.fromstring(text)
|
||||
except xml.etree.ElementTree.ParseError:
|
||||
return None
|
||||
|
||||
|
||||
def safe_name(name):
|
||||
"""Return safe variable name."""
|
||||
value = ''.join(char.lower() if char.isalnum() else '' for char in name)
|
||||
return value or 'root' # "/" is "root"
|
||||
|
||||
|
||||
def brick_name(name):
|
||||
"""Return short version of brick's name, strip domain from hostname."""
|
||||
if ':/' not in name:
|
||||
return name
|
||||
host, path = name.split(':', 1)
|
||||
return f'{host.split(".")[0]}:{path}'
|
||||
|
||||
|
||||
def find_volumes(need_details):
|
||||
"""Find gluster volumes."""
|
||||
# gluster --mode=script --xml volume info all
|
||||
tree = run_command(['volume', 'info', 'all'])
|
||||
if not tree:
|
||||
return None
|
||||
volumes = []
|
||||
for volume in tree.findall('volInfo/volumes/volume'):
|
||||
value = {
|
||||
'name': volume.find('name').text,
|
||||
'uuid': volume.find('id').text,
|
||||
'status': volume.find('status').text == '1',
|
||||
'bricks': [],
|
||||
}
|
||||
for brick in volume.findall('bricks/brick'):
|
||||
value['bricks'].append({
|
||||
'name': brick_name(brick.find('name').text),
|
||||
'uuid': brick.find('hostUuid').text,
|
||||
'status': False,
|
||||
'disk_total': 0,
|
||||
'disk_free': 0,
|
||||
'inode_total': 0,
|
||||
'inode_free': 0,
|
||||
'heal_pending': 'U',
|
||||
'heal_split': 'U',
|
||||
'heal_healing': 'U',
|
||||
})
|
||||
volumes.append(value)
|
||||
|
||||
# Don't get detailed status unless needed. It can be slow.
|
||||
if not need_details:
|
||||
return volumes
|
||||
|
||||
# gluster --mode=script --xml volume status all detail
|
||||
tree = run_command(['volume', 'status', 'all', 'detail'])
|
||||
for node in tree.findall('volStatus/volumes/volume/node'):
|
||||
uuid = node.find('peerid').text
|
||||
for volume in volumes:
|
||||
for brick in volume['bricks']:
|
||||
if brick['uuid'] == uuid:
|
||||
brick.update({
|
||||
'status': node.find('status').text == '1',
|
||||
'disk_total': int(node.find('sizeTotal').text),
|
||||
'disk_free': int(node.find('sizeFree').text),
|
||||
'inode_total': int(node.find('inodesTotal').text),
|
||||
'inode_free': int(node.find('inodesFree').text),
|
||||
})
|
||||
|
||||
# gluster --mode-script --xml volume heal <volumename> info summary
|
||||
for volume in volumes:
|
||||
tree = run_command(['volume', 'heal', volume['name'], 'info',
|
||||
'summary'])
|
||||
for node in tree.findall('healInfo/bricks/brick'):
|
||||
uuid = node.attrib['hostUuid']
|
||||
for brick in volume['bricks']:
|
||||
if brick['uuid'] == uuid:
|
||||
brick.update({
|
||||
'heal_pending': node.find(
|
||||
'numberOfEntriesInHealPending').text,
|
||||
'heal_split': node.find(
|
||||
'numberOfEntriesInSplitBrain').text,
|
||||
'heal_healing': node.find(
|
||||
'numberOfEntriesPossiblyHealing').text,
|
||||
})
|
||||
return volumes
|
||||
|
||||
|
||||
def print_status(config):
|
||||
"""Print config or values."""
|
||||
# pylint: disable=too-many-branches
|
||||
# pylint: disable=too-many-statements
|
||||
both = os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1'
|
||||
volumes = find_volumes(not config or both)
|
||||
if not volumes:
|
||||
return
|
||||
|
||||
# Volume started, all bricks are online
|
||||
print('multigraph gluster_status')
|
||||
if config or both:
|
||||
print('graph_title Gluster volume status')
|
||||
print('graph_vlabel Status')
|
||||
print('graph_category disk')
|
||||
print('graph_info Status: 0 = Stopped, 1 = Degraded, 2 = OK')
|
||||
print('graph_args --lower-limit 0 --upper-limit 2')
|
||||
print('graph_scale no')
|
||||
for volume in volumes:
|
||||
name = safe_name(volume['name'])
|
||||
if config or both:
|
||||
print(f'{name}.label Volume {volume["name"]}')
|
||||
print(f'{name}.warning 2:')
|
||||
if not config or both:
|
||||
status = int(volume['status'])
|
||||
if status and all(brick['status'] for brick in volume['bricks']):
|
||||
status = 2
|
||||
print(f'{name}.value {status}')
|
||||
|
||||
# Brick heal status
|
||||
for volume in volumes:
|
||||
name = safe_name(volume['name'])
|
||||
print(f'multigraph gluster_heal_{name}')
|
||||
if config or both:
|
||||
print(f'graph_title Gluster volume {name} brick status')
|
||||
print('graph_vlabel Entries')
|
||||
print('graph_category disk')
|
||||
print('graph_args --base 1000 --lower-limit 0')
|
||||
for brick in volume['bricks']:
|
||||
bname = safe_name(brick['uuid'])
|
||||
print(f'{bname}_pending.label {brick["name"]} '
|
||||
'in heal pending')
|
||||
print(f'{bname}_pending.warning 0')
|
||||
print(f'{bname}_split.label {brick["name"]} '
|
||||
'in split-brain')
|
||||
print(f'{bname}_split.warning 0')
|
||||
print(f'{bname}_healing.label {brick["name"]} '
|
||||
'possibly healing')
|
||||
print(f'{bname}_healing.warning 0')
|
||||
if not config or both:
|
||||
for brick in volume['bricks']:
|
||||
bname = safe_name(brick['uuid'])
|
||||
print(f'{bname}_pending.value {brick["heal_pending"]}')
|
||||
print(f'{bname}_split.value {brick["heal_split"]}')
|
||||
print(f'{bname}_healing.value {brick["heal_healing"]}')
|
||||
|
||||
# Brick disk/inode free
|
||||
for gtype in ('disk', 'inode'):
|
||||
for volume in volumes:
|
||||
name = safe_name(volume['name'])
|
||||
print(f'multigraph gluster_df_{gtype}_{name}')
|
||||
if config or both:
|
||||
print(f'graph_title Gluster volume {name} {gtype} usage '
|
||||
'in percent')
|
||||
print('graph_vlabel %')
|
||||
print('graph_category disk')
|
||||
print('graph_args --lower-limit 0 --upper-limit 100')
|
||||
print('graph_scale no')
|
||||
for brick in volume['bricks']:
|
||||
bname = safe_name(brick['uuid'])
|
||||
print(f'{bname}.label {brick["name"]}')
|
||||
if not config or both:
|
||||
for brick in volume['bricks']:
|
||||
bname = safe_name(brick['uuid'])
|
||||
if not brick[f'{gtype}_total']:
|
||||
print(f'{bname}.value U')
|
||||
else:
|
||||
value = (100 - 100 * brick[f'{gtype}_free'] /
|
||||
brick[f'{gtype}_total'])
|
||||
print(f'{bname}.value {value}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
|
||||
print('yes' if find_volumes(False) else
|
||||
'no (no Gluster volumes found)')
|
||||
elif len(sys.argv) > 1 and sys.argv[1] == 'config':
|
||||
print_status(True)
|
||||
else:
|
||||
print_status(False)
|
Loading…
Add table
Add a link
Reference in a new issue