1
0
Fork 0
mirror of https://github.com/munin-monitoring/contrib.git synced 2025-07-22 02:51:03 +00:00

Configurable warning and critical temperatures for GPUs

This commit is contained in:
Nuno Fachada 2013-11-12 11:29:12 +00:00 committed by Steve Schnepp
parent 758ca724a0
commit 10b1de81bb
2 changed files with 10 additions and 9 deletions

View file

@ -17,6 +17,8 @@ This plugin uses the following configuration variables:
[nvidia_gpu_*]
env.smiexec - Location of nvidia-smi executable.
env.warning - Warning temperature
env.critical - Critical temperature
=head2 DEFAULT CONFIGURATION
@ -101,8 +103,8 @@ if [ "$1" = "config" ]; then
while [ $nGpusCounter -lt $nGpus ]
do
gpuName=`echo "$nGpusOutput" | sed -n $(( $nGpusCounter + 1 ))p | cut -d \( -f 1`
echo "temp${nGpusCounter}.warning 75"
echo "temp${nGpusCounter}.critical 95"
echo "temp${nGpusCounter}.warning ${warning:-75}"
echo "temp${nGpusCounter}.critical ${critical:-95}"
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
: $(( nGpusCounter = $nGpusCounter + 1 ))
done
@ -205,8 +207,6 @@ do
done
# TODO Follow multigraph suggestion from Flameeyes to look into multigraph plugins http://munin-monitoring.org/wiki/MultigraphSampleOutput, in order to reduce the amount of round trips to get the data.
# TODO Put warning and critical as vars in config with sensible defaults
# TODO Nvidia only: Add unsupported output options from nvidia-smi for those who have that option (how to test?). Test if they are supported and put them in suggest (or not) in case they are supported (or not)