#!/usr/bin/env python3 """Munin plugin to monitor keepalived state and status. =head1 NAME keepalived - monitor keepalived state and status =head1 APPLICABLE SYSTEMS Linux systems with keepalived running. =head1 CONFIGURATION Pidfile and datafile locations must be configured if following default values are not correct: [keepalived] user root env.pidfile /run/keepalived.pid env.datafile /run/keepalived/keepalived.data =head1 AUTHOR Kim B. Heino =head1 LICENSE GPLv2 =head1 MAGIC MARKERS #%# family=auto #%# capabilities=autoconf =cut """ import os import pathlib import sys import time import unicodedata PIDFILE = pathlib.Path(os.getenv('pidfile', '/run/keepalived.pid')) DATAFILE = pathlib.Path(os.getenv('datafile', '/run/keepalived/keepalived.data')) def safename(name): """Return safe variable name.""" # Convert ä->a as isalpha('ä') is true value = unicodedata.normalize('NFKD', name) value = value.encode('ASCII', 'ignore').decode('utf-8') # Remove non-alphanumeric chars return ''.join(char.lower() if char.isalnum() else '_' for char in value) def datafile_fresh(fresh_time): """Check if datafile exists and is fresh.""" try: stat = DATAFILE.stat() if stat.st_mtime > fresh_time and stat.st_size > 1024: return True except FileNotFoundError: pass return False def update_datafile(): """Signal keepalived to write data file.""" # Find keepalived's pid try: pid = int(PIDFILE.read_text('utf-8')) except (FileNotFoundError, ValueError): return False # Check if current file is fresh fresh = time.time() - 30 if datafile_fresh(fresh): return True # Signal keepalived to update file try: os.kill(pid, 10) # keepalived --signum=DATA except OSError: return False # Wait for datafile to be updated for _dummy_wait in range(15): if datafile_fresh(fresh): time.sleep(1) # One extra second to make it sure it's complete return True time.sleep(1) return False def read_datafile(): """Update, read and parse datafile.""" if not update_datafile(): return None data = { 'vrrp_instance': {}, 'vrrp_sync_group': {}, } section = None vrrp_instance = None for line in DATAFILE.read_text('utf-8').splitlines(): if line.startswith('------<'): section = line.split('< ', 1)[1].split(' >')[0] elif ' = ' in line: key, value = line.split(' = ', 1) # Global if section == 'Global definitions' and key == ' Router ID': data['router_id'] = value # Instance elif section == 'VRRP Topology' and key == ' VRRP Instance': vrrp_instance = value elif section == 'VRRP Topology' and key == ' State': data['vrrp_instance'][vrrp_instance] = value # Sync group elif section == 'VRRP Sync groups' and key == ' VRRP Sync Group': name, state = value.split(', ', 1) data['vrrp_sync_group'][name] = state return data if 'router_id' in data else None def state_as_number(value): """Return state as number.""" if value == 'MASTER': return 1 if value == 'BACKUP': return 0 return -1 # FAILED def config(): """Print plugin config.""" data = read_datafile() if not data: return print('multigraph keepalived_state') print('graph_title Keepalived VRRP state') print('graph_info VRRP states: 1 = master, 0 = backup, -1 = failed') print('graph_category network') print('graph_vlabel state') print('graph_args --lower-limit -1 --upper-limit 1') print('graph_scale no') for key in data['vrrp_sync_group']: print(f'sg_{safename(key)}.label Sync group {key} state') print(f'sg_{safename(key)}.warning 0:1') for key in data['vrrp_instance']: print(f'i_{safename(key)}.label Instance {key} state') print(f'i_{safename(key)}.warning 0:1') if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1': fetch(data) def fetch(data=None): """Print values.""" if not data: data = read_datafile() if not data: return print('multigraph keepalived_state') for key, value in data['vrrp_sync_group'].items(): print(f'sg_{safename(key)}.value {state_as_number(value)}') for key, value in data['vrrp_instance'].items(): print(f'i_{safename(key)}.value {state_as_number(value)}') if __name__ == '__main__': if len(sys.argv) > 1 and sys.argv[1] == 'autoconf': print('yes' if read_datafile() else 'no (no keepalived running)') elif len(sys.argv) > 1 and sys.argv[1] == 'config': config() else: fetch()