1
0
Fork 0
mirror of https://github.com/munin-monitoring/contrib.git synced 2025-08-01 13:53:51 +00:00

Modify nvidia_gpu_by_user plugin

This commit is contained in:
h-takano 2018-09-26 08:40:50 +09:00
parent 614e7ed004
commit 9b9721a142

View file

@ -6,13 +6,24 @@
=head1 NAME
gpubyuser - Plugin to monitor GPU memory usage by user
nvidia_gpu_by_user - Plugin to monitor GPU memory usage by user.
=head1 CONFIGURATION
Add this to node configuration file:
[nvidia_gpu_by_user]
env.smiexec - Location of nvidia-smi executable.
env.gpuusers - List of the username to monitor(space separated).
=head1 USAGE
Example:
[nvidia_gpu_by_user]
env.smiexec /path/to/nvidia-smi
env.gpuusers root hideki
If env.gpuusers is set, graph always shows listed users.
Otherwise graph shows users that have been using GPUs only.
=head1 AUTHOR
@ -32,11 +43,15 @@ GPLv2
EOF
. "$MUNIN_LIBDIR/plugins/plugin.sh"
# . "$MUNIN_LIBDIR/plugins/plugin.sh"
. /usr/share/munin/plugins/plugin.sh
# Get location of nvidia-smi executable or use default
nvSmiExec=${smiexec:-'/usr/bin/nvidia-smi'}
# Get gpuusers
gpuUSERS=${gpuusers:-""}
# Check if autoconf was requested
if [ "$1" = "autoconf" ]; then
# Autoconf only returns yes if nvidia-smi exists and is executable
@ -49,8 +64,6 @@ if [ "$1" = "autoconf" ]; then
fi
fi
gpuUSERS=$(clean_fieldname "$(ls /home)" | tr "\n" " ")
# GPU usage
smiOutput=$("$nvSmiExec" -q)
smiInfo=$(echo "$smiOutput" | grep -A 3 -E "(Product Name|GPU UUID|Process ID|FB Memory Usage)" | grep -E "(Product Name|GPU UUID|Process ID|Total|Used GPU Memory)")
@ -66,6 +79,7 @@ echo "$smiInfo" | \
BEGIN {
n=-1;
split("", gpu);
stderr="/dev/stderr"
}
$0 ~ "^Product Name" {
@ -84,7 +98,7 @@ $0 ~ "^Total" {
}
$0 ~ "^Process ID" {
"ps -axo pid,user | sed \"s/^ *//g\" | grep \"^"$2" \" 2>/dev/null | cut -d\" \" -f 2 | sed -e \"s/^[^A-Za-z_]/_/\" -e \"s/[^A-Za-z0-9_]/_/g\" -e \"s/^root$/__root/\" | tr \"\n\" \" \" | tr -d \" \"" | getline tmpid
"ps -axo pid,user | sed \"s/^ *//g\" | grep \"^"$2" \" 2>/dev/null | cut -d\" \" -f 2 | sed -e \"s/^[^A-Za-z_]/_/\" -e \"s/[^A-Za-z0-9_]/_/g\" | tr \"\n\" \" \" | tr -d \" \"" | getline tmpid
if (tmpid == "") tmpid = "other";
m = getUserIdxInGpu(n, tmpid);
gpu["user", n, m] = tmpid;
@ -99,8 +113,8 @@ $0 ~ "^Used GPU Memory" {
END {
if (n < 0) {
print "No NVIDIA GPUs detected. Exiting."
exit;
print "No NVIDIA GPUs detected. Exiting." > stderr;
exit 1;
}
@ -236,5 +250,4 @@ function getUserCountInGpu(_n) {
}
return j;
}
'