diff --git a/plugins/system/cpu_linux_multi b/plugins/system/cpu_linux_multi new file mode 100755 index 00000000..4f2f10b4 --- /dev/null +++ b/plugins/system/cpu_linux_multi @@ -0,0 +1,387 @@ +#! /usr/bin/perl +######################################################################## +# # +# WARNING WARNING WARNING WARNING WARNING WARNING # +# # +# This plugin does not work properly with multiple master # +# # +######################################################################## +# +# +# multigraph, supersampling, extended cpu informations +# +# require: mpstat (to actually collect the data) +# require linux /proc +# (sorry, quick and dirty retrieve the number of cpu from /proc/cpuinfo) +# +# +# ENV (default): +# MUNIN_PLUGSTATE - pid and cache files gets there +# +# ENV (user defined): +# MUNIN_UPDATERATE - rate at which to update (default: 1s) +# MUNIN_CACHEFLUSH_RATE - flush data every N batch (default: 1) +# MUNIN_MPSTAT - binary to use as mpstat +# +# increase cache flush rate if you have i/o performance issues +# warning: increasing flushrate too much might cause partial write, and loss +# of data. 0 to disable flush +# +# +# Parent graph: cpu usage per core/thread +# child graph(1): detailed cpu usage overall +# child graph(n): detailed cpu usage per thread +# +# Known bugs: +# +# Multi-Master +# If there are many masters, the data is only sent once. Each master will +# only have part of the data. +# +# Everlasting +# The daemon is launched on first config/fetch. A touch of the pidfile is +# done on every following config/fetch. The daemon should check if the +# pidfile is recent (configurable) enough, and stop itself if not. +# +# Graph Order +# There is currently (2.0.6) noway to order childgraphs. +# +# RRD file +# The master currently (2.0.6) generate rrd file for aggregate values, and +# complains that no data is provided for them (but the graph still works +# fine) + +#%# family=auto +#%# capabilities=autoconf + +use strict; +use warnings; + +my $plugin = $0; +$plugin =~ s/.*\///; + +# order to display +my $fields_order = [ + 'sys', + 'usr', + 'nice', + 'idle', + 'iowait', + 'irq', + 'soft', + 'steal', + 'guest', +]; +# order is the order given by mpstat +my $fields_info = [ + { + name => 'usr', + label => 'usr', + info => "%s time spent in normal programs and daemons", + }, { + name => 'nice', + label => 'nice', + info => "%s time spent in nice(1)d programs and daemons", + }, { + name => 'sys', + label => 'sys', + info => "%s time spent in kernel system activity", + }, { + name => 'iowait', + label => 'iowait', + info => "%s time spent waiting for blocking I/O operations", + }, { + name => 'irq', + label => 'irq', + info => "%s time spent handling interrupts", + }, { + name => 'soft', + label => 'soft', + info => "%s time spent handling software interrupts", + }, { + name => 'steal', + label => 'steal', + info => "%s time spent elsewhere (stolen from us)", + }, { + name => 'guest', + label => 'guest', + info => "%s time spent in a guest operating system", + }, { + name => 'idle', + label => 'idle', + info => "%s time spent idling (waiting to get something to do)", + } +]; + +# mpstat sampling interval +my $update_rate = 1; +if (defined $ENV{MUNIN_UPDATERATE}) { + if ($ENV{MUNIN_UPDATERATE} =~ /^[1-9][0-9]*$/) { + $update_rate = int($ENV{MUNIN_UPDATERATE}); + } else { + print STDERR "Invalid update_rate: $ENV{MUNIN_UPDATERATE}"; + } +} + +my $flush_interval = 1; +if (defined $ENV{MUNIN_CACHEFLUSH_RATE}) { + if ($ENV{MUNIN_CACHEFLUSH_RATE} =~ /^[0-9]+$/) { + $update_rate = int($ENV{MUNIN_CACHEFLUSH_RATE}); + } else { + print STDERR "Invalid flush rate: $ENV{MUNIN_CACHEFLUSH_RATE}"; + } +} + +my $mpstat = "mpstat"; +if (defined $ENV{MUNIN_MPSTAT}) { + if (-f $ENV{MUNIN_MPSTAT}) { + print STDERR "MUNIN_STAT: file not found: $ENV{MUNIN_MPSTAT}"; + } else { + $mpstat = defined $ENV{MUNIN_MPSTAT}; + } +} + +sub pidfile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.pid" } +sub cachefile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.cache" } + +sub graph_section() { "system:cpu" }; +sub graph_name() { "cpu_extended_multi_1s" }; +sub graph_title() { "CPU usage" }; +sub graph_title_all() { "Overall CPU usage" }; +sub graph_title_n($) { "CPU#" . shift . " usage" }; +sub acquire_name() { "<$plugin> collecting information" } + +my $cpu_count_cache = undef; +sub cpu_count() { + # XXX: is there any way to do that cleanly ? + if (not defined $cpu_count_cache) { + $cpu_count_cache = `grep -c ^processor /proc/cpuinfo`; + chomp $cpu_count_cache; + } + return $cpu_count_cache; +} + +sub is_running() { + if (-f pidfile()) { + my $pid = undef; + if (open FILE, "<", pidfile()) { + $pid = ; + close FILE; + chomp $pid; + } + if ($pid) { + # does not exist ? kill it + if (kill 0, $pid) { + return 1; + } + } + unlink(pidfile()); + } + return 0; +} + + +# FIXME: should also trap kill sigint and sigterm +# FIXME: check pidfile got touched recently +sub acquire() { + $0 = acquire_name(); + $ARGV = [ '' ]; + $0 = "<$plugin> collecting information"; + open PIDFILE, '>', pidfile() or die "open: @{[ pidfile() ]}: $!\n"; + print PIDFILE $$, "\n"; + close PIDFILE; + open CACHE, ">>", cachefile() or die "open: @{[ cachefile() ]}: $!\n"; + open MPSTAT, "-|", "$mpstat -P ALL $update_rate" or + die "open mpstat|: $!\n"; + my $flush_count = 0; + while () { + chomp; + my @field = split(); + if (!($field[1] =~ /^(all|[0-9]+)$/)) { + next; + } + $field[0] = $field[1]; + $field[1] = time(); + print CACHE join(" ", @field), "\n"; + if ($flush_interval) { + if ($flush_interval == ++$flush_count) { + CACHE->flush(); + $flush_count = 0; + } + } + } + unlink(pidfile()); + unlink(cachefile()); +} + +sub run_daemon() { + if (is_running()) { + my $atime; + my $mtime; + $atime = $mtime = time; + utime $atime, $mtime, pidfile(); + } else { + if (0 == fork()) { + close(STDIN); + close(STDOUT); + close(STDERR); + open STDIN, "<", "/dev/null"; + open STDOUT, ">", "/dev/null"; + open STDERR, ">", "/dev/null"; + acquire(); + exit(0); + } + } +} + + +sub run_autoconf() { + # in case we have specified args, check the file before that + my $file = $mpstat; + $file =~ s/ .*//; + my $path = `which "$file"`; + if ($path) { + print "yes\n"; + } else { + print "no\n"; + } +} + +sub show_config($$$) { + my $i = shift; + my $name = shift; + my $title = shift; + my $graph_order = "graph_order"; + for my $field (@$fields_order) { + $graph_order .= " $field"; + } + print <{name} eq $fields_order->[0]) { + $style = "AREA"; + } + print <{name}.label $field->{label} +$field->{name}.draw $style +$field->{name}.info @{[ sprintf($field->{info}, $name) ]} +$field->{name}.min 0 +$field->{name}.cdef $field->{name} +EOF + } +} + +sub run_config() { + run_daemon(); + my $cpus = cpu_count(); + my $graph_order = "graph_order"; + my $sub_order = "order cpuall"; + for (my $i = 0; $i < $cpus; ++$i) { + $graph_order .= " use$i=@{[ graph_name() ]}.cpu$i.idle"; + $sub_order .= " cpu$i"; + } +# none of those seems to have any effect +#domain_$sub_order +#node_$sub_order +#graph_$sub_order +#service_$sub_order +#category_$sub_order +#group_$sub_order + + print <{name}.value $line->[1]:$line->[$n] +EOF + ++$n; + } +} +sub run_fetch() { + run_daemon(); + if (open CACHE, "+<", cachefile()) { + my $cpus = {}; + while () { + chomp; + my $field = []; + @$field = split(/ /); + if (not defined $cpus->{$field->[0]}) { + $cpus->{$field->[0]} = []; + } + push @{$cpus->{$field->[0]}}, $field; + } + # finished reading ? trucate it right away + truncate CACHE, 0; + close CACHE; + foreach my $cpu (keys %$cpus) { + print <{$cpu}}) { + fetch_showline($line); + } + } + } +} + +my $cmd = 'fetch'; +if (defined $ARGV[0]) { + $cmd = $ARGV[0]; +} +if ('fetch' eq $cmd) { + run_fetch(); +} elsif ('config' eq $cmd) { + run_config(); +} elsif ('autoconf' eq $cmd) { + run_autoconf(); +} elsif ('daemon' eq $cmd) { + run_daemon(); +} else { + print STDERR <