mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-25 02:18:08 +00:00
Initial version
This commit is contained in:
parent
11d9b90be7
commit
938d414337
1 changed files with 137 additions and 0 deletions
137
plugins/other/nvidia_smi_
Executable file
137
plugins/other/nvidia_smi_
Executable file
|
@ -0,0 +1,137 @@
|
|||
#!/usr/bin/perl -w
|
||||
# -*- perl -*-
|
||||
#
|
||||
# Script to monitor NVIDIA Graphics Card.
|
||||
#
|
||||
# Parameters understood:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - used by munin-config)
|
||||
#
|
||||
# Magic markers (optional - used by munin-config and installation
|
||||
# scripts):
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf suggest
|
||||
|
||||
use strict;
|
||||
use XML::Simple;
|
||||
|
||||
my $nvidia_smi = $ENV{nvidia_smi} || "/usr/bin/nvidia-smi";
|
||||
|
||||
|
||||
## Munin autoconf method.
|
||||
if (exists $ARGV[0] and $ARGV[0] eq "autoconf" ) {
|
||||
if (! (-e $nvidia_smi)){
|
||||
printf "no (file $nvidia_smi does not exists)\n";
|
||||
exit 0;
|
||||
}
|
||||
# Now see if "nvidia-smi" can run
|
||||
if (! (-x $nvidia_smi)){
|
||||
printf "no (file $nvidia_smi exists, but not executable)\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my $text = `$nvidia_smi -a 2>/dev/null | grep GPU`;
|
||||
if ($?) {
|
||||
print "no (No GPUs found. Check '$nvidia_smi -a' output)\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
print "yes\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
|
||||
## Munin suggest method.
|
||||
if (defined $ARGV[0] and $ARGV[0] eq 'suggest') {
|
||||
# FIXME: SHould be done in pure-perl
|
||||
my $gpus = `$nvidia_smi -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]`;
|
||||
print $gpus if defined $gpus; #FIXME
|
||||
exit 0;
|
||||
}
|
||||
|
||||
$0 =~ /nvidia_smi_gpu(.+)*$/;
|
||||
my $gpu_id = $1;
|
||||
exit 2 unless defined $gpu_id;
|
||||
|
||||
# Get XML with sensor values for GPU with particular ID
|
||||
my $data = `$nvidia_smi -g $gpu_id -x` or die "Could not run $nvidia_smi: $!\n";
|
||||
|
||||
# Parse XML into easy accessable hash-tree
|
||||
my $ref = XMLin($data);
|
||||
my %gpu = (); # Will contain values cleaned form percent and Celsius signs
|
||||
|
||||
if ( exists $ref->{gpu}->{temp} ){
|
||||
$ref->{gpu}->{temp} =~ /^(.+) C$/;
|
||||
$gpu{temp} = $1;
|
||||
}
|
||||
|
||||
if ( exists $ref->{gpu}->{fan_speed} ){
|
||||
$ref->{gpu}->{fan_speed} =~ /^(.+)\%$/;
|
||||
$gpu{fan} = $1;
|
||||
}
|
||||
|
||||
if ( exists $ref->{gpu}->{utilization}->{gpu_util} ){
|
||||
$ref->{gpu}->{utilization}->{gpu_util} =~ /^(.+)\%$/;
|
||||
$gpu{util} = $1;
|
||||
}
|
||||
|
||||
if ( exists $ref->{gpu}->{utilization}->{memory_util} ){
|
||||
$ref->{gpu}->{utilization}->{memory_util} =~ /^(.+)\%$/;
|
||||
$gpu{mem} = $1;
|
||||
}
|
||||
|
||||
$gpu{model} = $ref->{gpu}->{prod_name} if exists $ref->{gpu}->{prod_name};
|
||||
$gpu{driver} = $ref->{driver_version} if exists $ref->{driver_version};
|
||||
|
||||
my $card_model = $gpu{model} || "<undetermined>";
|
||||
my $driver_version = $gpu{driver} || "<undetermined>";
|
||||
|
||||
## Munin config method.
|
||||
if (exists $ARGV[0] and $ARGV[0] eq "config") {
|
||||
print "graph_title $card_model sensors\n";
|
||||
print "graph_args --base 1000\n";
|
||||
print "graph_args --upper-limit 100 -l 0\n";
|
||||
print "graph_category sensors\n";
|
||||
print "graph_vlabel % or C\n";
|
||||
print "graph_info This graph shows information about your $card_model graphics card running driver version $driver_version.\n";
|
||||
|
||||
if (exists $gpu{temp}) {
|
||||
print "gpu_temp.label GPU Temperature (C)\n";
|
||||
print "gpu_temp.info GPU temperature sensor\n";
|
||||
print "gpu_temp.draw LINE2\n";
|
||||
print "gpu_temp.warning :80\n";
|
||||
print "gpu_temp.critical :100\n";
|
||||
}
|
||||
|
||||
if (exists $gpu{mem}) {
|
||||
print "gpu_mem.label Memory consumption (%)\n";
|
||||
print "gpu_mem.info How much of on-board memory is used\n";
|
||||
print "gpu_mem.draw LINE2\n";
|
||||
print "gpu_mem.warning :85\n";
|
||||
print "gpu_mem.critical :95\n";
|
||||
}
|
||||
|
||||
if (exists $gpu{util}) {
|
||||
print "gpu_util.label GPU Utilization (%)\n";
|
||||
print "gpu_util.info How much computational resourses are used\n";
|
||||
print "gpu_util.draw LINE2\n";
|
||||
}
|
||||
|
||||
if (exists $gpu{fan}) {
|
||||
print "gpu_fan.label Fan Speed (%)\n";
|
||||
print "gpu_fan.info Fan RPM in precent of maximum\n";
|
||||
print "gpu_fan.draw LINE2\n";
|
||||
print "gpu_fan.warning :80\n";
|
||||
print "gpu_fan.critical :95\n";
|
||||
}
|
||||
|
||||
exit 0;
|
||||
}
|
||||
|
||||
|
||||
print "gpu_temp.value ",$gpu{temp},"\n" if exists $gpu{temp};
|
||||
print "gpu_mem.value ", $gpu{mem}, "\n" if exists $gpu{mem};
|
||||
print "gpu_util.value ",$gpu{util},"\n" if exists $gpu{util};
|
||||
print "gpu_fan.value ", $gpu{fan}, "\n" if exists $gpu{fan};
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue