From e43e8825279507f7144847670f3bcc6b548cc9c1 Mon Sep 17 00:00:00 2001 From: "Kim B. Heino" Date: Mon, 11 Dec 2023 11:50:40 +0200 Subject: [PATCH] process_group: monitor process group age, cpu, memory and count Generic plugin to monitor configured processes. Multiple process groups can be configured with regexps. --- plugins/cpu/process_group | 205 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100755 plugins/cpu/process_group diff --git a/plugins/cpu/process_group b/plugins/cpu/process_group new file mode 100755 index 00000000..4b99fe85 --- /dev/null +++ b/plugins/cpu/process_group @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 + +"""Munin plugin to monitor process group process count, memory, cpu and age. + +=head1 NAME + +process_group - monitor process group process count, memory, cpu and age + +=head1 APPLICABLE SYSTEMS + +Linux systems. + +=head1 CONFIGURATION + +List of monitored process groups must be configured as regexps. Example: + + [process_group] + env.group1 php-fpm: pool + env.group2 nginx + +This will monitor two process groups: "php-fpm: pool" PHP application server +running any pool and "nginx" web server. Regexps must match "ps -eo command" +output. + +=head1 AUTHOR + +Kim B. Heino + +=head1 LICENSE + +GPLv2 + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut +""" + +import os +import re +import subprocess +import sys +import unicodedata + + +def safename(name): + """Return safe variable name.""" + # Convert ä->a as isalpha('ä') is true + value = unicodedata.normalize('NFKD', name) + value = value.encode('ASCII', 'ignore').decode('utf-8') + + # Remove non-alphanumeric chars + return ''.join(char.lower() if char.isalnum() else '_' for char in value) + + +def run_binary(arg): + """Run binary and return output.""" + try: + return subprocess.run(arg, stdout=subprocess.PIPE, check=False, + encoding='utf-8', errors='ignore').stdout + except FileNotFoundError: + return '' + + +def parse_config(): + """Get groups from environment variables / munin plugin config.""" + groups = [] + counter = 1 + while True: + group = os.getenv(f'group{counter}') + if not group: + break + groups.append(group) + counter += 1 + return groups + + +def parse_elapsed(text): + """Parse ps's elapsed field to seconds. + + 8-23:05:27 day, hour, min, sec + 21:16:53 hour, min, sec + 04:29 min, sec + """ + days = hours = minutes = seconds = '0' + if '-' in text: + days, text = text.split('-', 1) + if text.count(':') == 2: + hours, text = text.split(':', 1) + minutes, seconds = text.split(':') + return (int(days) * 86400 + + int(hours) * 3600 + + int(minutes) * 60 + + int(seconds)) + + +def collect_data(): + """Run ps and parse its output.""" + groups = parse_config() + values = {} + for group in groups: + values[group] = { + 'count': 0, + 'cpu': 0, + 'elapsed': 0, + 'rss': 0, + } + + lines = run_binary(['ps', '-eo', '%cpu,etime,rss,command']) + for line in lines.splitlines(): + cpu, elapsed, rss, command = line.split(None, 3) + for group in groups: + if re.search(group, command): + values[group]['count'] += 1 + values[group]['cpu'] += float(cpu) + values[group]['elapsed'] += parse_elapsed(elapsed) + values[group]['rss'] += int(rss) * 1024 + return values + + +def config(): + """Print plugin config.""" + groups = parse_config() + if not groups: + return + + print('multigraph process_group_age') + print('graph_title Process group average age') + print('graph_category processes') + print('graph_vlabel seconds') + print('graph_args --base 1000') + print('graph_scale no') + for group in groups: + print(f'{safename(group)}.label {group}') + + print('multigraph process_group_memory') + print('graph_title Process group average memory') + print('graph_category processes') + print('graph_vlabel bytes') + print('graph_args --base 1024') + for group in groups: + print(f'{safename(group)}.label {group}') + + print('multigraph process_group_cpu') + print('graph_title Process group CPU usage') + print('graph_category processes') + print('graph_vlabel %') + print('graph_args --base 1000') + print('graph_scale no') + for group in groups: + print(f'{safename(group)}.label {group}') + + print('multigraph process_group_count') + print('graph_title Process group process count') + print('graph_category processes') + print('graph_vlabel processes') + print('graph_args --base 1000') + print('graph_scale no') + for group in groups: + print(f'{safename(group)}.label {group}') + + if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1': + fetch() + + +def fetch(): + """Print values.""" + data = collect_data() + if not data: + return + + print('multigraph process_group_age') + for group, values in data.items(): + if not values['count']: + value = 0 + else: + value = values['elapsed'] / values['count'] + print(f'{safename(group)}.value {value}') + + print('multigraph process_group_memory') + for group, values in data.items(): + if not values['count']: + value = 0 + else: + value = values['rss'] / values['count'] + print(f'{safename(group)}.value {value}') + + print('multigraph process_group_cpu') + for group, values in data.items(): + print(f'{safename(group)}.value {values["cpu"]}') + + print('multigraph process_group_count') + for group, values in data.items(): + print(f'{safename(group)}.value {values["count"]}') + + +if __name__ == '__main__': + if len(sys.argv) > 1 and sys.argv[1] == 'autoconf': + print('yes' if parse_config() else 'no (no groups configured)') + elif len(sys.argv) > 1 and sys.argv[1] == 'config': + config() + else: + fetch()