#!/bin/bash # -*- sh -*- set -e : << =cut =head1 NAME cpu_by_group - Monitors cpu time for all processes on a system and groups them into graphs by either cgroup, system processes or kernel threads =head1 DESCRIPTION Works similar as good ol' cpu_by_process, but groups processes by either their cgroup, system proceses or kernel threads to produce smaller graphs. For docker projects / containers the processes are grouped by either their docker compose project name or container name. For user cgroups the user id will be resolved to the corresponding user name. The plugin keeps track of previously running processes in MUNIN_STATEFILE =head2 EXAMPLE GRAPHS There are 3 example graphs which were all automatically generated by this plugin. - System-day -> system processes (processes without a cgroup) - KThread-day -> kernel threads - caddy-day -> A docker compose project named caddy, with a single container named caddy and a single caddy process =head1 CONFIGURATION For full docker support, ensure that munin can access docker. F.e. run munin with the docker group: [cpu_by_group] group docker =head1 REQUIREMENTS - bash v4.3+ - docker access rights (if using docker) =head1 AUTHOR Copyright (C) 2025 pimlie =head1 LICENSE MIT =head1 MAGIC MARKERS #%# family=auto #%# capabilities=autoconf =cut if [ -n "$MUNIN_LIBDIR" ]; then . "$MUNIN_LIBDIR/plugins/plugin.sh" fi PLUGIN_BASE="$(basename "$0")" PLUGIN_CACHE="$MUNIN_STATEFILE" # Check if docker can be used function can_use_docker { if command -v docker >/dev/null; then if docker ps >/dev/null 2>&1; then echo 1 return 0 fi fi echo 0 return 1 } HAS_DOCKER="$(can_use_docker)" # is a bool, 1=true # Left trim white spaces function ltrim { local var="$*" # remove leading whitespace characters var="${var#"${var%%[![:space:]]*}"}" echo "$var" } # # Create a munin variable safe & human readable cgroup name # # Supported human readable names: # - 'User $user_name' for isolated user processes # - For docker containers # - Group by compose.service label (the docker project name) if applicable # - Add container name, either as group if not docker compose otherwise as graph prefix # declare -A docker_cache function safe_cgroup_name { local -n local_cgroup_name=$1 local cgroup="$2" # shellcheck disable=SC2076 if [[ "$cgroup" != *.scope ]] || [[ "$cgroup" == */init.scope ]]; then # if no cgroup scope, just list as system cgroup="System" # Unless it's a kernel thread if [ -r "/proc/$3/stat" ]; then IFS=' ' read -ra stats < "/proc/$3/stat" local flags flags=${stats[8]} pkThread=0x00200000 if (( (flags & pkThread) == pkThread )); then cgroup="KThread" fi fi elif [[ "$cgroup" =~ "/user.slice/user-" ]]; then # Extract user id and use user name as cgroup name user_id=${cgroup#*/user.slice/user-} user_id=${user_id%%.*} user="$(id -nu "$user_id")" cgroup="User-${user}" elif [ "$HAS_DOCKER" -eq 1 ] && [[ "$cgroup" =~ "/docker-" ]]; then # Extract docker container id and use either compose project otherwise # container name as cgroup name docker_id=${cgroup#*/docker-} docker_id=${docker_id%%.*} docker_data="${docker_cache["$docker_id"]}" if [ -z "$docker_data" ]; then docker_data="$(docker inspect --format='{{ .Name }} {{ index .Config.Labels "com.docker.compose.project" }}' "$docker_id")" docker_cache["$docker_id"]="$docker_data" fi IFS=' ' read -ra docker_names <<< "$docker_data" image_title=${docker_names[0]//*\//} compose_project="${docker_names[1]}" if [ -n "$compose_project" ]; then cgroup="$compose_project.$image_title" elif [ -n "$image_title" ]; then cgroup="$image_title" fi else cgroup=${cgroup##*/} cgroup=${cgroup%%.*} fi # shellcheck disable=SC2034 local_cgroup_name="${cgroup//_/-}" } # Format process name as munin safe variable function safe_proc_name { local -n local_name=$1 local name="$2" name="${name%%/*}" # Remove everything after a / name="${name%.}" # Remove trailing dot name="${name//[^a-zA-Z0-9]/_}" # Make Munin var safe # shellcheck disable=SC2034 local_name="$name" } # Format graph name as munin safe variable function safe_graph_name { name="${1:-cputime}" # Use cputime as default value if unset name="${name//|/.}" # Replace any pipes to dots echo "${name//./_}" # Replace dots with underscores } # Calculate process time in seconds function calc_proc_time { local -n local_proc_time=$1 IFS=":" read -ra time <<< "$2" if [ "${#time[@]}" -ne 3 ]; then echo "Expected time to be in ([days]-)[hour]:[min]:[sec] format, got '$1'" >&2 exit 1 fi local day=0 local hour="${time[0]}" IFS="-" read -ra dayhour <<< "${time[0]}" if [ "${#dayhour[@]}" -gt 1 ]; then day="${dayhour[0]}" hour="${dayhour[1]}" fi local hours="$(( day * 24 + 10#$hour ))" # shellcheck disable=SC2034 local_proc_time="$(( 10#$hours * 3600 + 10#${time[1]} * 60 + 10#${time[2]}))" } # Get array of all previous & current running processes function get_processes { local -n procs=$1 if [ -n "$PLUGIN_CACHE" ] && [ -r "$PLUGIN_CACHE" ] && [ -s "$PLUGIN_CACHE" ]; then while read -r process; do procs["$process"]=0; done < "$PLUGIN_CACHE" fi IFS=$'\n' for proc_line in $(ps -eo time,pid,comm h); do IFS=' ' read -r -a proc <<< "$proc_line" # Create dot separated cgroup / process name breadcrumb local proc_id local cgroup_file proc_id="$(ltrim "${proc[1]}")" cgroup_file="/proc/$proc_id/cgroup" # cat cgroup directly from /proc fs, as ps is bad for parsing multiple variable # width columns and trimming whitespace is quite slow in bash local cgroup_name="" if [ -r "$cgroup_file" ]; then cgroup_name="$(cat "$cgroup_file")" fi proc_name="${proc[2]}" proc_time=0 safe_cgroup_name cgroup_name "$cgroup_name" "$proc_id" safe_proc_name proc_name "$proc_name" # Use everything before the first dot as the cgroup name, and everything # after the first dot as process name. # This ensures that we can also use prefixes within cgroup's, fe # for docker containers we want the compose projects as cgroup name # but still prefix individual processes with their container name local breadcrumb local multi_name local multi_graph_name breadcrumb="${cgroup_name}.${proc_name}" multi_name="${breadcrumb%%.*}" multi_graph_name="${breadcrumb#*.}" calc_proc_time proc_time "${proc[0]}" if [ "$proc_time" -gt 0 ]; then procs_key="${multi_name}|${multi_graph_name}" cur_value="${procs["$procs_key"]}" procs["$procs_key"]="$(( cur_value + proc_time ))" fi done unset IFS } function cache_processes { local -n proc_names=$1 if [ -n "$PLUGIN_CACHE" ] && [ -w "$(dirname "$PLUGIN_CACHE")" ]; then echo "${proc_names[@]}" | tr ' ' '\n' > "$PLUGIN_CACHE" fi } # Emit multigraph header function emit_multigraph_base { if [ -z "$1" ]; then echo "multigraph $PLUGIN_BASE" else echo "multigraph ${PLUGIN_BASE}_${1//-/_}" fi } # Emit base graph config function emit_graph_base { cat <