mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-08-02 06:08:23 +00:00
Modify nvidia_gpu_by_user plugin
This commit is contained in:
parent
614e7ed004
commit
9b9721a142
1 changed files with 21 additions and 8 deletions
|
@ -6,13 +6,24 @@
|
||||||
|
|
||||||
=head1 NAME
|
=head1 NAME
|
||||||
|
|
||||||
gpubyuser - Plugin to monitor GPU memory usage by user
|
nvidia_gpu_by_user - Plugin to monitor GPU memory usage by user.
|
||||||
|
|
||||||
=head1 CONFIGURATION
|
=head1 CONFIGURATION
|
||||||
|
|
||||||
Add this to node configuration file:
|
Add this to node configuration file:
|
||||||
[nvidia_gpu_by_user]
|
[nvidia_gpu_by_user]
|
||||||
env.smiexec - Location of nvidia-smi executable.
|
env.smiexec - Location of nvidia-smi executable.
|
||||||
|
env.gpuusers - List of the username to monitor(space separated).
|
||||||
|
|
||||||
|
=head1 USAGE
|
||||||
|
|
||||||
|
Example:
|
||||||
|
[nvidia_gpu_by_user]
|
||||||
|
env.smiexec /path/to/nvidia-smi
|
||||||
|
env.gpuusers root hideki
|
||||||
|
|
||||||
|
If env.gpuusers is set, graph always shows listed users.
|
||||||
|
Otherwise graph shows users that have been using GPUs only.
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
|
||||||
|
@ -32,11 +43,15 @@ GPLv2
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
. "$MUNIN_LIBDIR/plugins/plugin.sh"
|
# . "$MUNIN_LIBDIR/plugins/plugin.sh"
|
||||||
|
. /usr/share/munin/plugins/plugin.sh
|
||||||
|
|
||||||
# Get location of nvidia-smi executable or use default
|
# Get location of nvidia-smi executable or use default
|
||||||
nvSmiExec=${smiexec:-'/usr/bin/nvidia-smi'}
|
nvSmiExec=${smiexec:-'/usr/bin/nvidia-smi'}
|
||||||
|
|
||||||
|
# Get gpuusers
|
||||||
|
gpuUSERS=${gpuusers:-""}
|
||||||
|
|
||||||
# Check if autoconf was requested
|
# Check if autoconf was requested
|
||||||
if [ "$1" = "autoconf" ]; then
|
if [ "$1" = "autoconf" ]; then
|
||||||
# Autoconf only returns yes if nvidia-smi exists and is executable
|
# Autoconf only returns yes if nvidia-smi exists and is executable
|
||||||
|
@ -49,8 +64,6 @@ if [ "$1" = "autoconf" ]; then
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
gpuUSERS=$(clean_fieldname "$(ls /home)" | tr "\n" " ")
|
|
||||||
|
|
||||||
# GPU usage
|
# GPU usage
|
||||||
smiOutput=$("$nvSmiExec" -q)
|
smiOutput=$("$nvSmiExec" -q)
|
||||||
smiInfo=$(echo "$smiOutput" | grep -A 3 -E "(Product Name|GPU UUID|Process ID|FB Memory Usage)" | grep -E "(Product Name|GPU UUID|Process ID|Total|Used GPU Memory)")
|
smiInfo=$(echo "$smiOutput" | grep -A 3 -E "(Product Name|GPU UUID|Process ID|FB Memory Usage)" | grep -E "(Product Name|GPU UUID|Process ID|Total|Used GPU Memory)")
|
||||||
|
@ -66,6 +79,7 @@ echo "$smiInfo" | \
|
||||||
BEGIN {
|
BEGIN {
|
||||||
n=-1;
|
n=-1;
|
||||||
split("", gpu);
|
split("", gpu);
|
||||||
|
stderr="/dev/stderr"
|
||||||
}
|
}
|
||||||
|
|
||||||
$0 ~ "^Product Name" {
|
$0 ~ "^Product Name" {
|
||||||
|
@ -84,7 +98,7 @@ $0 ~ "^Total" {
|
||||||
}
|
}
|
||||||
|
|
||||||
$0 ~ "^Process ID" {
|
$0 ~ "^Process ID" {
|
||||||
"ps -axo pid,user | sed \"s/^ *//g\" | grep \"^"$2" \" 2>/dev/null | cut -d\" \" -f 2 | sed -e \"s/^[^A-Za-z_]/_/\" -e \"s/[^A-Za-z0-9_]/_/g\" -e \"s/^root$/__root/\" | tr \"\n\" \" \" | tr -d \" \"" | getline tmpid
|
"ps -axo pid,user | sed \"s/^ *//g\" | grep \"^"$2" \" 2>/dev/null | cut -d\" \" -f 2 | sed -e \"s/^[^A-Za-z_]/_/\" -e \"s/[^A-Za-z0-9_]/_/g\" | tr \"\n\" \" \" | tr -d \" \"" | getline tmpid
|
||||||
if (tmpid == "") tmpid = "other";
|
if (tmpid == "") tmpid = "other";
|
||||||
m = getUserIdxInGpu(n, tmpid);
|
m = getUserIdxInGpu(n, tmpid);
|
||||||
gpu["user", n, m] = tmpid;
|
gpu["user", n, m] = tmpid;
|
||||||
|
@ -99,8 +113,8 @@ $0 ~ "^Used GPU Memory" {
|
||||||
END {
|
END {
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
|
|
||||||
print "No NVIDIA GPUs detected. Exiting."
|
print "No NVIDIA GPUs detected. Exiting." > stderr;
|
||||||
exit;
|
exit 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,5 +250,4 @@ function getUserCountInGpu(_n) {
|
||||||
}
|
}
|
||||||
return j;
|
return j;
|
||||||
}
|
}
|
||||||
|
|
||||||
'
|
'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue