From 8b1e467b7cd5ce035a7c163d6fbc2b8faa954fd8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Nov 2012 08:52:06 +0000 Subject: [PATCH 1/7] Improve the HTTP plugin by allowing configuring a proxy and friendly name per URL. Note that the configuration file syntax has changed. You need one "url" variable per URL now, numbered starting with 1, e.g. url1, url2, url3. Each one can be configured by suffixing its variable, e.g. url1_proxy, url2_proxy, etc. --- plugins/http/http_request_time | 82 +++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/plugins/http/http_request_time b/plugins/http/http_request_time index 509db0d8..a5852a16 100755 --- a/plugins/http/http_request_time +++ b/plugins/http/http_request_time @@ -10,7 +10,12 @@ =head1 CONFIGURATION [http_request_time] - env.url http://127.0.0.1/1 http://127.0.0.1/2 http://127.0.0.1/3 + env.url http://127.0.0.1/1 + env.url2 http://127.0.0.1/2 + env.url3 http://www.example.com + env.url3_name some_munin_internal_name + env.url3_label Some random page on our website + env.url3_proxy http://firewall:3128 =head1 MAGIC MARKERS @@ -41,7 +46,6 @@ sub clean { return $surl; }; - if (! eval "require LWP::UserAgent;") { $ret = "LWP::UserAgent not found"; @@ -50,12 +54,19 @@ if (! eval "require LWP::UserAgent;") } } -my $URL = $ENV{'url'}?$ENV{'url'}:"http://127.0.0.1/"; my %URLS; -foreach $_ (split(/ /,$URL)){ - $URLS{$_}={ - url=>$_, - surl=>clean($_), + +for (my $i = 1; $ENV{"url$i"}; $i++) +{ + my $url = $ENV{"url$i"}; + my $proxy = $ENV{"url${i}_proxy"}; + my $name = $ENV{"url${i}_name"} || clean($url); + my $label = $ENV{"url${i}_label"} || $url; + + $URLS{$name}={ + url=>$url, + proxy=>$proxy, + label=>$label, time=>'U' }; } @@ -94,50 +105,67 @@ if ( defined $ARGV[0] and $ARGV[0] eq "config" ) print "graph_category other\n"; my @go; - foreach my $url (values %URLS) { - print "$$url{'surl'}.label $$url{'url'}\n"; - print "$$url{'surl'}.info The response time of a single request\n"; - print "$$url{'surl'}.min 0\n"; - print "$$url{'surl'}.draw LINE1\n"; - push(@go,$$url{'surl'}); + foreach my $name (keys %URLS) { + my $url = $URLS{$name}; + print "$name.label $$url{'label'}\n"; + print "$name.info The response time of a single request\n"; + print "$name.min 0\n"; + print "$name.draw LINE1\n"; + push(@go, $name); } # multigraphs - foreach my $url (values %URLS) { - print "\nmultigraph http_request_time.$$url{'surl'}\n"; + foreach my $name (keys %URLS) { + my $url = $URLS{$name}; + print "\nmultigraph http_request_time.$name\n"; print "graph_title $$url{'url'}\n"; print "graph_args --base 1000\n"; print "graph_vlabel response time in ms\n"; print "graph_category other\n"; - print "$$url{'surl'}.label $$url{'url'}\n"; - print "$$url{'surl'}.info The response time of a single request\n"; - print "$$url{'surl'}.min 0\n"; - print "$$url{'surl'}.draw LINE1\n"; + print "$name.label $$url{'label'}\n"; + print "$name.info The response time of a single request\n"; + print "$name.min 0\n"; + print "$name.draw LINE1\n"; } exit 0; } my $ua = LWP::UserAgent->new(timeout => 15); +foreach my $name (keys %URLS) { + my $url = $URLS{$name}; -foreach my $url (values %URLS) { - my $t1=[gettimeofday]; + if ($url->{proxy}) { + $ua->proxy(['http', 'ftp'], $url->{proxy}); + } + else { + $ua->proxy(['http', 'ftp'], undef); + } + + # warm up my $response = $ua->request(HTTP::Request->new('GET',$$url{'url'})); + + # timed run + my $t1=[gettimeofday]; + $response = $ua->request(HTTP::Request->new('GET',$$url{'url'})); my $t2=[gettimeofday]; + if ($response->is_success) { $$url{'time'}=sprintf("%d",tv_interval($t1,$t2)*1000); }; }; print("multigraph http_request_time\n"); -foreach my $url (values %URLS) { - print("$$url{'surl'}.value $$url{'time'}\n"); -} -foreach my $url (values %URLS) { - print("\nmultigraph http_request_time.$$url{'surl'}\n"); - print("$$url{'surl'}.value $$url{'time'}\n"); +foreach my $name (keys %URLS) { + my $url = $URLS{$name}; + print("$name.value $$url{'time'}\n"); } +foreach my $name (keys %URLS) { + my $url = $URLS{$name}; + print("\nmultigraph http_request_time.$name\n"); + print("$name.value $$url{'time'}\n"); +} # vim:syntax=perl From 55aa46a3a03ef47dbe6bd106ce656f53e2d33f84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Nov 2012 09:31:37 +0000 Subject: [PATCH 2/7] Add our "all processes" monitoring plugin, cleaned up, and a page fault plugin. --- plugins/system/cpu_by_process | 106 +++++++++++++++++++++++++++ plugins/system/pagefaults_by_process | 105 ++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100755 plugins/system/cpu_by_process create mode 100755 plugins/system/pagefaults_by_process diff --git a/plugins/system/cpu_by_process b/plugins/system/cpu_by_process new file mode 100755 index 00000000..f299a087 --- /dev/null +++ b/plugins/system/cpu_by_process @@ -0,0 +1,106 @@ +#!/usr/bin/perl +# +# Copyright 2012 Chris Wilson +# Copyright 2006 Holger Levsen +# +# This plugin monitors ALL processes on a system. No exceptions. It can +# produce very big graphs! But if you want to know where your CPU time +# is going without knowing what to monitor in advance, this can help; +# or in addition to one of the more specific CPU plugins to monitor +# just Apache or MySQL, for example. +# +# It's not obvious what the graph heights actually mean, even to me. +# Each counter is a DERIVE (difference since the last counter reading) +# of the CPU time usage (in seconds) accounted to each process, summed +# by the process name, so all Apache and all MySQL processes are grouped +# together. Processes with no CPU usage at all are ignored. Processes +# that die may not appear on the graph, and anyway their last chunk of +# CPU usage before they died is lost. You could modify this plugin to +# read SAR/psacct records if you care about that. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 dated June, +# 1991. + +#scriptname=`basename $0` +#vsname=`echo $scriptname | perl -ne '/^vserver_proc_VM_(.*)/ and print $1'` + +#if [ "$1" = "suggest" ]; then +# ls -1 /etc/vservers +# exit 0 +#elif [ -z "$vsname" ]; then +# echo "Must be used with a vserver name; try '$0 suggest'" >&2 +# exit 2 +#fi + +use strict; +use warnings; + +my $cmd = "ps -eo time,comm h"; +open PS, "$cmd|" or die "Failed to run ps command: $cmd: $!"; + +# my $header_line = ; +my %total_cpu_by_process; + +while () +{ + my @fields = split; + my $cputime = $fields[0]; + my $process = $fields[1]; + + # remove any / and everything after it from the process name, + # e.g. kworker/0:2 -> kworker + $process =~ s|/.*||; + + # remove any . at the end of the name (why does this appear?) + # $process =~ s|\.$||; + + # change any symbol that's not allowed in a munin variable name to _ + $process =~ tr|a-zA-Z0-9|_|c; + + my @times = split /:/, $cputime; + $cputime = (($times[0] * 60) + $times[1]) * 60 + $times[2]; + $total_cpu_by_process{$process} += $cputime; +} + +foreach my $process (keys %total_cpu_by_process) +{ + # remove all processes with 0 cpu time + if (not $total_cpu_by_process{$process}) + { + delete $total_cpu_by_process{$process}; + } +} + +close(PS); + +if (@ARGV and $ARGV[1] == "config") +{ + print <&2 +# exit 2 +#fi + +use strict; +use warnings; + +my $cmd = "ps -eo maj_flt,comm h"; +open PS, "$cmd|" or die "Failed to run ps command: $cmd: $!"; + +# my $header_line = ; +my %total_by_process; + +while () +{ + my @fields = split; + my $value = $fields[0]; + my $process = $fields[1]; + + # remove any / and everything after it from the process name, + # e.g. kworker/0:2 -> kworker + $process =~ s|/.*||; + + # remove any . at the end of the name (why does this appear?) + # $process =~ s|\.$||; + + # change any symbol that's not allowed in a munin variable name to _ + $process =~ tr|a-zA-Z0-9|_|c; + + $total_by_process{$process} += $value; +} + +foreach my $process (keys %total_by_process) +{ + # remove all processes with 0 faults + if (not $total_by_process{$process}) + { + delete $total_by_process{$process}; + } +} + +close(PS); + +if (@ARGV and $ARGV[1] == "config") +{ + print < Date: Thu, 1 Nov 2012 09:32:21 +0000 Subject: [PATCH 3/7] These plugins should be executable. --- plugins/system/membyuser | 0 plugins/system/memory_by_process | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 plugins/system/membyuser mode change 100644 => 100755 plugins/system/memory_by_process diff --git a/plugins/system/membyuser b/plugins/system/membyuser old mode 100644 new mode 100755 diff --git a/plugins/system/memory_by_process b/plugins/system/memory_by_process old mode 100644 new mode 100755 From c00710c083e6c3b4b5264c8bf1d207c9805f28db Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Nov 2012 09:55:47 +0000 Subject: [PATCH 4/7] Fix process labels. --- plugins/system/cpu_by_process | 2 +- plugins/system/pagefaults_by_process | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/system/cpu_by_process b/plugins/system/cpu_by_process index f299a087..f452aa6f 100755 --- a/plugins/system/cpu_by_process +++ b/plugins/system/cpu_by_process @@ -89,7 +89,7 @@ END sub draw() { return $stack++ ? "STACK" : "AREA" } print map { - "$_.label $total_cpu_by_process{$_}\n" . + "$_.label $_\n" . "$_.min 0\n" . "$_.type DERIVE\n" . "$_.draw " . draw() . "\n" diff --git a/plugins/system/pagefaults_by_process b/plugins/system/pagefaults_by_process index 89ad2b79..a1f7f10e 100755 --- a/plugins/system/pagefaults_by_process +++ b/plugins/system/pagefaults_by_process @@ -88,7 +88,7 @@ END sub draw() { return $stack++ ? "STACK" : "AREA" } print map { - "$_.label $total_by_process{$_}\n" . + "$_.label $_\n" . "$_.min 0\n" . "$_.type DERIVE\n" . "$_.draw " . draw() . "\n" From fcbc8edbf8b36b0675e19b705f7a58e34fe95143 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 1 Nov 2012 10:29:10 +0000 Subject: [PATCH 5/7] Fix string comparison. --- plugins/system/cpu_by_process | 2 +- plugins/system/pagefaults_by_process | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/system/cpu_by_process b/plugins/system/cpu_by_process index f452aa6f..504ff309 100755 --- a/plugins/system/cpu_by_process +++ b/plugins/system/cpu_by_process @@ -75,7 +75,7 @@ foreach my $process (keys %total_cpu_by_process) close(PS); -if (@ARGV and $ARGV[1] == "config") +if (@ARGV and $ARGV[1] eq "config") { print < Date: Wed, 7 Nov 2012 15:31:10 +0000 Subject: [PATCH 6/7] Add a plugin for adding up any number column output by ps. --- plugins/system/total_by_process_ | 118 +++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100755 plugins/system/total_by_process_ diff --git a/plugins/system/total_by_process_ b/plugins/system/total_by_process_ new file mode 100755 index 00000000..5e3b1a1c --- /dev/null +++ b/plugins/system/total_by_process_ @@ -0,0 +1,118 @@ +#!/usr/bin/perl +# +# Copyright 2012 Chris Wilson +# Copyright 2006 Holger Levsen +# +# This plugin monitors ALL processes on a system. No exceptions. It can +# produce very big graphs! But if you want to know which processes are +# killing your system by page faulting, without knowing what to monitor +# in advance, this can help; or in addition to one of the more specific +# plugins to monitor just Apache or MySQL, for example. +# +# Each counter is a DERIVE (difference since the last counter reading) +# of the number of major page faults, usually 4k each, read in by a +# process. Memory mapped files probably contribute to this. The process +# cannot continue until the page fault is served, so this is a +# high-priority read that usually indicates memory starvation. +# Processes with no page faults at all are ignored. Processes +# that die may not appear on the graph, and anyway their last chunk of +# CPU usage before they died is lost. You could modify this plugin to +# read SAR/psacct records if you care about that. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; version 2 dated June, +# 1991. + +use strict; +use warnings; + +my $scriptname = $0; +$scriptname =~ s|.*/||; +my $fieldname = ($scriptname =~ /^total_by_process_(.*)_(.*)/) ? $1 : undef; +my $fieldtype = ($scriptname =~ /^total_by_process_(.*)_(.*)/) ? $2 : undef; + +if (@ARGV and $ARGV[1] eq "suggest") +{ + system("ps L | cut -d' ' -f1"); + exit(0); +} + +if (!$fieldname) +{ + print STDERR "Must be used with a PS format specifier name; try '$0 suggest'"; + exit(2); +} + +unless ($fieldtype =~ /^(GAUGE|DERIVE)$/) +{ + print STDERR "Unknown field type $fieldtype: should be GAUGE or DERIVE"; + exit(2); +} + +my $cmd = "ps -eo $fieldname,comm h"; +open PS, "$cmd|" or die "Failed to run ps command: $cmd: $!"; + +# my $header_line = ; +my %total_by_process; + +while () +{ + my @fields = split; + my $value = $fields[0]; + my $process = $fields[1]; + + # remove any / and everything after it from the process name, + # e.g. kworker/0:2 -> kworker + $process =~ s|/.*||; + + # remove any . at the end of the name (why does this appear?) + # $process =~ s|\.$||; + + # change any symbol that's not allowed in a munin variable name to _ + $process =~ tr|a-zA-Z0-9|_|c; + + $total_by_process{$process} += $value; +} + +foreach my $process (keys %total_by_process) +{ + # remove all processes with 0 faults + if (not $total_by_process{$process}) + { + delete $total_by_process{$process}; + } +} + +close(PS); + +if (@ARGV and $ARGV[1] == "config") +{ + print < Date: Wed, 7 Nov 2012 15:33:11 +0000 Subject: [PATCH 7/7] Improve the left-side label of the graph. --- plugins/system/total_by_process_ | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/system/total_by_process_ b/plugins/system/total_by_process_ index 5e3b1a1c..a2da8fc0 100755 --- a/plugins/system/total_by_process_ +++ b/plugins/system/total_by_process_ @@ -92,6 +92,7 @@ if (@ARGV and $ARGV[1] == "config") graph_title $fieldname by Process graph_category system graph_info Shows total of $fieldname (reported by ps) for each process name +graph_vlabel $fieldname (from ps) END # graph_args --base 1000