mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-22 14:16:00 +00:00
commit
04e9d5bfa1
2 changed files with 39 additions and 10 deletions
|
@ -9,7 +9,7 @@ amd_gpu_ - Wildcard plugin to monitor AMD GPUs. Uses aticonfig utility,
|
||||||
usually bundled with AMD GPU driver, to obtain information. To use this
|
usually bundled with AMD GPU driver, to obtain information. To use this
|
||||||
plugin you have to make sure aticonfig will run without an active X
|
plugin you have to make sure aticonfig will run without an active X
|
||||||
server (i.e. without anyone being logged in via the GUI). For more
|
server (i.e. without anyone being logged in via the GUI). For more
|
||||||
information on this visit this link:
|
information about this issue visit the link below:
|
||||||
http://www.mayankdaga.com/running-opencl-applications-remotely-on-amd-gpus/
|
http://www.mayankdaga.com/running-opencl-applications-remotely-on-amd-gpus/
|
||||||
|
|
||||||
=head1 CONFIGURATION
|
=head1 CONFIGURATION
|
||||||
|
@ -20,12 +20,15 @@ value to monitor.
|
||||||
This plugin uses the following configuration variables:
|
This plugin uses the following configuration variables:
|
||||||
|
|
||||||
[amd_gpu_*]
|
[amd_gpu_*]
|
||||||
env.aticonfexec - Location of aticonfig executable.
|
|
||||||
user root
|
user root
|
||||||
|
env.aticonfexec - Location of aticonfig executable.
|
||||||
|
env.warning - Warning temperature
|
||||||
|
env.critical - Critical temperature
|
||||||
|
|
||||||
=head2 DEFAULT CONFIGURATION
|
=head2 DEFAULT CONFIGURATION
|
||||||
|
|
||||||
The default configuration is to set "env.aticonfexec" to /usr/bin/aticonfig.
|
The default configuration is to set "env.aticonfexec" to /usr/bin/aticonfig and
|
||||||
|
assume warning and critical temperatures of 75 and 95 degrees celsius, respectively.
|
||||||
|
|
||||||
=head2 EXAMPLE WILDCARD USAGE
|
=head2 EXAMPLE WILDCARD USAGE
|
||||||
|
|
||||||
|
@ -33,6 +36,16 @@ C<ln -s /usr/share/munin/plugins/amd_gpu_ /etc/munin/plugins/amd_gpu_temp>
|
||||||
|
|
||||||
...will monitor the temperature of available AMD GPUs.
|
...will monitor the temperature of available AMD GPUs.
|
||||||
|
|
||||||
|
=head1 TODO
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item *
|
||||||
|
|
||||||
|
Use multigraphs for multiple GPUs (http://munin-monitoring.org/wiki/MultigraphSampleOutput).
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
|
||||||
Nuno Fachada
|
Nuno Fachada
|
||||||
|
@ -104,8 +117,8 @@ if [ "$1" = "config" ]; then
|
||||||
while [ $nGpusCounter -lt $nGpus ]
|
while [ $nGpusCounter -lt $nGpus ]
|
||||||
do
|
do
|
||||||
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
|
gpuName=`echo "$nGpusOutput" | grep "* 0" | cut -f 1,3 --complement -d " "`
|
||||||
echo "temp${nGpusCounter}.warning 75"
|
echo "temp${nGpusCounter}.warning ${warning:-75}"
|
||||||
echo "temp${nGpusCounter}.critical 95"
|
echo "temp${nGpusCounter}.critical ${critical:-95}"
|
||||||
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
||||||
echo "temp${nGpusCounter}.label Temperature ($gpuName)"
|
echo "temp${nGpusCounter}.label Temperature ($gpuName)"
|
||||||
: $(( nGpusCounter = $nGpusCounter + 1 ))
|
: $(( nGpusCounter = $nGpusCounter + 1 ))
|
||||||
|
@ -231,5 +244,3 @@ do
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,10 +17,13 @@ This plugin uses the following configuration variables:
|
||||||
|
|
||||||
[nvidia_gpu_*]
|
[nvidia_gpu_*]
|
||||||
env.smiexec - Location of nvidia-smi executable.
|
env.smiexec - Location of nvidia-smi executable.
|
||||||
|
env.warning - Warning temperature
|
||||||
|
env.critical - Critical temperature
|
||||||
|
|
||||||
=head2 DEFAULT CONFIGURATION
|
=head2 DEFAULT CONFIGURATION
|
||||||
|
|
||||||
The default configuration is to set "env.smiexec" to /usr/bin/nvidia-smi.
|
The default configuration is to set "env.smiexec" to /usr/bin/nvidia-smi and
|
||||||
|
assume warning and critical temperatures of 75 and 95 degrees celsius, respectively.
|
||||||
|
|
||||||
=head2 EXAMPLE WILDCARD USAGE
|
=head2 EXAMPLE WILDCARD USAGE
|
||||||
|
|
||||||
|
@ -28,6 +31,21 @@ C<ln -s /usr/share/munin/plugins/nvidia_gpu_ /etc/munin/plugins/nvidia_gpu_temp>
|
||||||
|
|
||||||
...will monitor the temperature of available GPUs.
|
...will monitor the temperature of available GPUs.
|
||||||
|
|
||||||
|
=head1 TODO
|
||||||
|
|
||||||
|
=over 4
|
||||||
|
|
||||||
|
=item *
|
||||||
|
|
||||||
|
Add support for specific professional GPU features such as number of compute
|
||||||
|
processes, clocks, power draw, utilization, and so on.
|
||||||
|
|
||||||
|
=item *
|
||||||
|
|
||||||
|
Use multigraphs for multiple GPUs (http://munin-monitoring.org/wiki/MultigraphSampleOutput).
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
|
||||||
Nuno Fachada
|
Nuno Fachada
|
||||||
|
@ -101,8 +119,8 @@ if [ "$1" = "config" ]; then
|
||||||
while [ $nGpusCounter -lt $nGpus ]
|
while [ $nGpusCounter -lt $nGpus ]
|
||||||
do
|
do
|
||||||
gpuName=`echo "$nGpusOutput" | sed -n $(( $nGpusCounter + 1 ))p | cut -d \( -f 1`
|
gpuName=`echo "$nGpusOutput" | sed -n $(( $nGpusCounter + 1 ))p | cut -d \( -f 1`
|
||||||
echo "temp${nGpusCounter}.warning 75"
|
echo "temp${nGpusCounter}.warning ${warning:-75}"
|
||||||
echo "temp${nGpusCounter}.critical 95"
|
echo "temp${nGpusCounter}.critical ${critical:-95}"
|
||||||
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
echo "temp${nGpusCounter}.info Temperature information for $gpuName"
|
||||||
: $(( nGpusCounter = $nGpusCounter + 1 ))
|
: $(( nGpusCounter = $nGpusCounter + 1 ))
|
||||||
done
|
done
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue