mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
Configurable warning and critical temperatures for GPUs
This commit is contained in:
parent
758ca724a0
commit
10b1de81bb
2 changed files with 10 additions and 9 deletions
|
@ -9,7 +9,7 @@ amd_gpu_ - Wildcard plugin to monitor AMD GPUs. Uses aticonfig utility,
|
|||
usually bundled with AMD GPU driver, to obtain information. To use this
|
||||
plugin you have to make sure aticonfig will run without an active X
|
||||
server (i.e. without anyone being logged in via the GUI). For more
|
||||
information on this visit this link:
|
||||
information about this issue visit the link below:
|
||||
http://www.mayankdaga.com/running-opencl-applications-remotely-on-amd-gpus/
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
@ -20,8 +20,10 @@ value to monitor.
|
|||
This plugin uses the following configuration variables:
|
||||
|
||||
[amd_gpu_*]
|
||||
env.aticonfexec - Location of aticonfig executable.
|
||||
user root
|
||||
env.aticonfexec - Location of aticonfig executable.
|
||||
env.warning - Warning temperature
|
||||
env.critical - Critical temperature
|
||||
|
||||
=head2 DEFAULT CONFIGURATION
|
||||
|
||||
|
@ -105,8 +107,8 @@ if [ "$1" = "config" ]; then
|
|||
while [ $nGpusCounter -lt $nGpus ]
|
||||
do
|
||||
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
|
||||
echo "temp${nGpusCounter}.warning 75"
|
||||
echo "temp${nGpusCounter}.critical 95"
|
||||
echo "temp${nGpusCounter}.warning ${warning:-75}"
|
||||
echo "temp${nGpusCounter}.critical ${critical:-95}"
|
||||
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
||||
echo "temp${nGpusCounter}.label Temperature ($gpuName)"
|
||||
: $(( nGpusCounter = $nGpusCounter + 1 ))
|
||||
|
@ -232,7 +234,6 @@ do
|
|||
done
|
||||
|
||||
# TODO Follow multigraph suggestion from Flameeyes to look into multigraph plugins http://munin-monitoring.org/wiki/MultigraphSampleOutput, in order to reduce the amount of round trips to get the data.
|
||||
# TODO Put warning and critical as vars in config with sensible defaults
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ This plugin uses the following configuration variables:
|
|||
|
||||
[nvidia_gpu_*]
|
||||
env.smiexec - Location of nvidia-smi executable.
|
||||
env.warning - Warning temperature
|
||||
env.critical - Critical temperature
|
||||
|
||||
=head2 DEFAULT CONFIGURATION
|
||||
|
||||
|
@ -101,8 +103,8 @@ if [ "$1" = "config" ]; then
|
|||
while [ $nGpusCounter -lt $nGpus ]
|
||||
do
|
||||
gpuName=`echo "$nGpusOutput" | sed -n $(( $nGpusCounter + 1 ))p | cut -d \( -f 1`
|
||||
echo "temp${nGpusCounter}.warning 75"
|
||||
echo "temp${nGpusCounter}.critical 95"
|
||||
echo "temp${nGpusCounter}.warning ${warning:-75}"
|
||||
echo "temp${nGpusCounter}.critical ${critical:-95}"
|
||||
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
||||
: $(( nGpusCounter = $nGpusCounter + 1 ))
|
||||
done
|
||||
|
@ -205,8 +207,6 @@ do
|
|||
done
|
||||
|
||||
# TODO Follow multigraph suggestion from Flameeyes to look into multigraph plugins http://munin-monitoring.org/wiki/MultigraphSampleOutput, in order to reduce the amount of round trips to get the data.
|
||||
# TODO Put warning and critical as vars in config with sensible defaults
|
||||
|
||||
# TODO Nvidia only: Add unsupported output options from nvidia-smi for those who have that option (how to test?). Test if they are supported and put them in suggest (or not) in case they are supported (or not)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue