mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 10:39:53 +00:00
Merge pull request #1399 from kimheino/master
keepalived: plugin to monitor keepalived status and state
This commit is contained in:
commit
8e235bd7a3
1 changed files with 186 additions and 0 deletions
186
plugins/network/keepalived
Executable file
186
plugins/network/keepalived
Executable file
|
@ -0,0 +1,186 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""Munin plugin to monitor keepalived state and status.
|
||||
|
||||
=head1 NAME
|
||||
|
||||
keepalived - monitor keepalived state and status
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
Linux systems with keepalived running.
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
Pidfile and datafile locations must be configured if following default
|
||||
values are not correct:
|
||||
|
||||
[keepalived]
|
||||
user root
|
||||
env.pidfile /run/keepalived.pid
|
||||
env.datafile /run/keepalived/keepalived.data
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Kim B. Heino <b@bbbs.net>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
=cut
|
||||
"""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import time
|
||||
import unicodedata
|
||||
|
||||
|
||||
PIDFILE = pathlib.Path(os.getenv('pidfile', '/run/keepalived.pid'))
|
||||
DATAFILE = pathlib.Path(os.getenv('datafile',
|
||||
'/run/keepalived/keepalived.data'))
|
||||
|
||||
|
||||
def safename(name):
|
||||
"""Return safe variable name."""
|
||||
# Convert ä->a as isalpha('ä') is true
|
||||
value = unicodedata.normalize('NFKD', name)
|
||||
value = value.encode('ASCII', 'ignore').decode('utf-8')
|
||||
|
||||
# Remove non-alphanumeric chars
|
||||
return ''.join(char.lower() if char.isalnum() else '_' for char in value)
|
||||
|
||||
|
||||
def datafile_fresh(fresh_time):
|
||||
"""Check if datafile exists and is fresh."""
|
||||
try:
|
||||
stat = DATAFILE.stat()
|
||||
if stat.st_mtime > fresh_time and stat.st_size > 1024:
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def update_datafile():
|
||||
"""Signal keepalived to write data file."""
|
||||
# Find keepalived's pid
|
||||
try:
|
||||
pid = int(PIDFILE.read_text('utf-8'))
|
||||
except (FileNotFoundError, ValueError):
|
||||
return False
|
||||
|
||||
# Check if current file is fresh
|
||||
fresh = time.time() - 30
|
||||
if datafile_fresh(fresh):
|
||||
return True
|
||||
|
||||
# Signal keepalived to update file
|
||||
try:
|
||||
os.kill(pid, 10) # keepalived --signum=DATA
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
# Wait for datafile to be updated
|
||||
for _dummy_wait in range(15):
|
||||
if datafile_fresh(fresh):
|
||||
time.sleep(1) # One extra second to make it sure it's complete
|
||||
return True
|
||||
time.sleep(1)
|
||||
return False
|
||||
|
||||
|
||||
def read_datafile():
|
||||
"""Update, read and parse datafile."""
|
||||
if not update_datafile():
|
||||
return None
|
||||
data = {
|
||||
'vrrp_instance': {},
|
||||
'vrrp_sync_group': {},
|
||||
}
|
||||
section = None
|
||||
vrrp_instance = None
|
||||
for line in DATAFILE.read_text('utf-8').splitlines():
|
||||
if line.startswith('------<'):
|
||||
section = line.split('< ', 1)[1].split(' >')[0]
|
||||
elif ' = ' in line:
|
||||
key, value = line.split(' = ', 1)
|
||||
# Global
|
||||
if section == 'Global definitions' and key == ' Router ID':
|
||||
data['router_id'] = value
|
||||
|
||||
# Instance
|
||||
elif section == 'VRRP Topology' and key == ' VRRP Instance':
|
||||
vrrp_instance = value
|
||||
elif section == 'VRRP Topology' and key == ' State':
|
||||
data['vrrp_instance'][vrrp_instance] = value
|
||||
|
||||
# Sync group
|
||||
elif section == 'VRRP Sync groups' and key == ' VRRP Sync Group':
|
||||
name, state = value.split(', ', 1)
|
||||
data['vrrp_sync_group'][name] = state
|
||||
|
||||
return data if 'router_id' in data else None
|
||||
|
||||
|
||||
def state_as_number(value):
|
||||
"""Return state as number."""
|
||||
if value == 'MASTER':
|
||||
return 1
|
||||
if value == 'BACKUP':
|
||||
return 0
|
||||
return -1 # FAILED
|
||||
|
||||
|
||||
def config():
|
||||
"""Print plugin config."""
|
||||
data = read_datafile()
|
||||
if not data:
|
||||
return
|
||||
|
||||
print('multigraph keepalived_state')
|
||||
print('graph_title Keepalived VRRP state')
|
||||
print('graph_info VRRP states: 1 = master, 0 = backup, -1 = failed')
|
||||
print('graph_category network')
|
||||
print('graph_vlabel state')
|
||||
print('graph_args --lower-limit -1 --upper-limit 1')
|
||||
print('graph_scale no')
|
||||
for key in data['vrrp_sync_group']:
|
||||
print(f'sg_{safename(key)}.label Sync group {key} state')
|
||||
print(f'sg_{safename(key)}.warning 0:1')
|
||||
for key in data['vrrp_instance']:
|
||||
print(f'i_{safename(key)}.label Instance {key} state')
|
||||
print(f'i_{safename(key)}.warning 0:1')
|
||||
|
||||
if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1':
|
||||
fetch(data)
|
||||
|
||||
|
||||
def fetch(data=None):
|
||||
"""Print values."""
|
||||
if not data:
|
||||
data = read_datafile()
|
||||
if not data:
|
||||
return
|
||||
|
||||
print('multigraph keepalived_state')
|
||||
for key, value in data['vrrp_sync_group'].items():
|
||||
print(f'sg_{safename(key)}.value {state_as_number(value)}')
|
||||
for key, value in data['vrrp_instance'].items():
|
||||
print(f'i_{safename(key)}.value {state_as_number(value)}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
|
||||
print('yes' if read_datafile() else 'no (no keepalived running)')
|
||||
elif len(sys.argv) > 1 and sys.argv[1] == 'config':
|
||||
config()
|
||||
else:
|
||||
fetch()
|
Loading…
Add table
Add a link
Reference in a new issue