mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
These services are running fine, so the extinfo would mostly just clutter the webui while making the extinfo of relevant (i.e. down) services harder to find.
156 lines
4.2 KiB
Bash
Executable file
156 lines
4.2 KiB
Bash
Executable file
#!/bin/bash
|
|
# -*- sh -*-
|
|
|
|
: << =cut
|
|
|
|
=head1 NAME
|
|
|
|
systemd - Plugin to monitor systemd units state
|
|
|
|
=head1 APPLICABLE SYSTEMS
|
|
|
|
Linux systems with systemd installed.
|
|
|
|
=head1 CONFIGURATION
|
|
|
|
No configuration is required for this plugin. You may optionally pass warning and critical values for any
|
|
of the possible states (active, reloading, inactive, failed, activating, deactivating), and you may
|
|
additionally pass both global and state-specific include/exclude filters to include only units you care
|
|
about and/or exclude units you don't care about.
|
|
|
|
(Note that for failed units, default warning and critical values are set to 0 and 10, respectively. No other
|
|
states have default levels set.)
|
|
|
|
Value calculations for each state are made using the following algorithm (all filters performed using
|
|
C<grep -E>):
|
|
|
|
1. Global include rules are applied on the output of C<systemctl list-units --all --no-legend>;
|
|
2. Global exclude rules are then applied to the result of that;
|
|
3. Then, for each state, this global output is further filtered by include, then exclude rules for the state;
|
|
4. Then the result is filtered for the given state and the remaining units counted and listed.
|
|
|
|
An example configuration might be something like this:
|
|
|
|
=over 2
|
|
|
|
[systemd_units]
|
|
env.failed_warning 0
|
|
env.failed_critical 5
|
|
env.inactive_warning 10
|
|
env.inactive_critical 20
|
|
env.exclude boring
|
|
env.inactive_exclude sleepy
|
|
env.silence_active_extinfo 1
|
|
|
|
=back
|
|
|
|
In the example above, we've overridden the default warning and critical levels for failed units, added warning
|
|
and critical levels for inactive units, then filtered out boring units from all results and filtered out sleepy
|
|
units from results for the inactive state. In addition to that, only more extensive info of non-active units
|
|
are displayed in order to quickly see which units are failing and why in the webui. (By default, all extra info
|
|
about all units is displayed.)
|
|
|
|
=head1 AUTHOR
|
|
|
|
Olivier Mehani <shtrom+munin@ssji.net>
|
|
Kael Shipman <kael.shipman@gmail.com>
|
|
|
|
=head1 LICENSE
|
|
|
|
GPLv2
|
|
|
|
=head1 MAGIC MARKERS
|
|
|
|
#%# family=auto
|
|
#%# capabilities=autoconf
|
|
|
|
=cut
|
|
|
|
. "$MUNIN_LIBDIR/plugins/plugin.sh"
|
|
|
|
failed_warning="${failed_warning:-0}"
|
|
failed_critical="${failed_critical:-10}"
|
|
silence_active_extinfo="${silence_active_extinfo:-0}"
|
|
|
|
states="active \
|
|
reloading \
|
|
inactive \
|
|
failed \
|
|
activating \
|
|
deactivating"
|
|
|
|
include="${include:-.*}"
|
|
exclude="${exclude:-^$}"
|
|
|
|
autoconf() {
|
|
which systemctl >/dev/null && \
|
|
systemctl --state=failed --no-pager --no-legend >/dev/null 2>&1 && \
|
|
echo yes || \
|
|
echo "no (No systemctl or error running it)"
|
|
}
|
|
|
|
config () {
|
|
cat << EOF
|
|
graph_title Systemd units state
|
|
graph_args -l 0
|
|
graph_category system
|
|
graph_scale no
|
|
graph_vlabel units
|
|
EOF
|
|
for state in $states; do
|
|
echo "$state.label $state"
|
|
echo "$state.draw AREASTACK"
|
|
print_warning $state
|
|
print_critical $state
|
|
done
|
|
}
|
|
|
|
fetch () {
|
|
# Get all units, filtering by global include/exclude rules
|
|
local state_include_var state_include state_exclude_var state_exclude global_unit_list state_unit_list
|
|
global_unit_list=$(systemctl --no-pager --no-legend --all | grep -E "$include" | grep -Ev "$exclude" | awk '{print $1, $3}')
|
|
|
|
# For each state, echo the number of units and some extra info, filtering for state-specific include/excludes
|
|
for state in $states ; do
|
|
# Get state-specific include/excludes, if present
|
|
state_include_var="${state}_include"
|
|
state_include="${!state_include_var}"
|
|
state_exclude_var="${state}_exclude"
|
|
state_exclude="${!state_exclude_var}"
|
|
state_unit_list="$global_unit_list"
|
|
|
|
# Filter
|
|
if [ -n "$state_include" ]; then
|
|
state_unit_list="$(echo "$state_unit_list" | grep -E "$state_include")"
|
|
fi
|
|
if [ -n "$state_exclude" ]; then
|
|
state_unit_list="$(echo "$state_unit_list" | grep -Ev "$state_exclude")"
|
|
fi
|
|
|
|
# Count and output
|
|
count=$(echo "$state_unit_list" | grep -c "$state$")
|
|
echo "$state.value $count"
|
|
extinfo=$(echo "$state_unit_list" | grep "$state$" | cut -d " " -f 1 | tr '\n' ' ')
|
|
if [ "$silence_active_extinfo" = "0" ]; then
|
|
if [ -n "$extinfo" ]; then
|
|
echo "$state.extinfo" "$extinfo"
|
|
fi
|
|
else
|
|
if [ -n "$extinfo" ] && [ "$state" != "active" ]; then
|
|
echo "$state.extinfo" "$extinfo"
|
|
fi
|
|
fi
|
|
done
|
|
}
|
|
|
|
case $1 in
|
|
"autoconf")
|
|
autoconf
|
|
;;
|
|
"config")
|
|
config
|
|
;;
|
|
*)
|
|
fetch
|
|
;;
|
|
esac
|