1
0
Fork 0
mirror of https://github.com/munin-monitoring/contrib.git synced 2025-07-24 18:07:20 +00:00

- have some dirs

This commit is contained in:
Steve Schnepp 2012-02-13 18:24:46 +01:00
parent 0b089ea777
commit 08346aac58
687 changed files with 0 additions and 0 deletions

67
plugins/disk/df_abs_bsd Executable file
View file

@ -0,0 +1,67 @@
#!/usr/bin/env python
# Copyright (c) 2008, Net Easy, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Net Easy, Inc. nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY Net Easy, Inc. ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Net Easy, Inc. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import re, os
from sys import argv
class checker(object):
def __init__(self):
'parsed_data will have a tuple of the disk capacity and usage (in kbytes) and the mount point for each disk partition'
self.parsed_data = {}
def __repr__(self):
thisdata = ''
for item in self.parsed_data.keys():
thisdata = '%s%s.value %s\n' % (thisdata, item, int(self.parsed_data[item][1] * 1024))
return thisdata
def config(self):
thisdata = """graph_title Filesystem usage (in bytes)
graph_args --base 1024 --lower-limit 0
graph_vlabel bytes
graph_category disk
graph_info Filesystem usage
"""
for item in self.parsed_data.keys():
thisdata = "%s%s.label %s\n%s.warning %s\n%s.critical %s\n" % (thisdata, item, self.parsed_data[item][2],
item, int(self.parsed_data[item][0] * 1024 * 0.92),
item, int(self.parsed_data[item][0] * 1024 * 0.98))
return thisdata
def get_data(self):
rawdata = os.popen('df -P -l -k').readlines()
for i in range(1,len(rawdata)):
dataline=rawdata[i].split()
self.parsed_data[re.sub('/', '_', dataline[0])] = (int(dataline[1]), int(dataline[2]), dataline[5])
if __name__ == "__main__":
processor = checker()
processor.get_data()
if len(argv) > 1 and argv[1] == 'config':
print processor.config()
else:
print processor

65
plugins/disk/df_bsd Executable file
View file

@ -0,0 +1,65 @@
#!/usr/bin/env python
# Copyright (c) 2008, Net Easy, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Net Easy, Inc. nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY Net Easy, Inc. ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Net Easy, Inc. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import re, os
from sys import argv
class checker(object):
def __init__(self):
'parsed_data will have a tuple of the percentage of disk usage and the mount point for each disk partition'
self.parsed_data = {}
def __repr__(self):
thisdata = ''
for item in self.parsed_data.keys():
thisdata = '%s%s.value %s\n' % (thisdata, item, self.parsed_data[item][0])
return thisdata
def config(self):
thisdata = """graph_title Filesystem usage (in %)
graph_args --lower-limit 0
graph_vlabel %
graph_category disk
graph_info Filesystem usage
"""
for item in self.parsed_data.keys():
thisdata = "%s%s.label %s\n%s.warning 92\n%s.critical 98\n" % (thisdata, item, self.parsed_data[item][1], item, item)
return thisdata
def get_data(self):
rawdata = os.popen('df -P -l').readlines()
for i in range(1,len(rawdata)):
dataline=rawdata[i].split()
self.parsed_data[re.sub('/', '_', dataline[0])] = (re.sub('%', '', dataline[4]), dataline[5])
if __name__ == "__main__":
processor = checker()
processor.get_data()
if len(argv) > 1 and argv[1] == 'config':
print processor.config()
else:
print processor

101
plugins/disk/df_with_nfs Executable file
View file

@ -0,0 +1,101 @@
#!/bin/sh
#
# Script to monitor disk usage.
#
# Parameters understood:
#
# config (required)
# autoconf (optional - used by munin-config)
#
# $Log$
# Revision 1.5.2.5 2011/04/20 14:25:07 ward
# Exclude tmpfs partitions from 'config'.
#
# Revision 1.5.2.4 2005/03/12 21:35:07 jimmyo
# Correct history loss in linux/{df,df_inode}.
#
# Revision 1.5.2.3 2005/03/10 10:04:48 jimmyo
# Fixed minor bug introduced with yesterdays change.
#
# Revision 1.5.2.2 2005/03/09 19:10:32 jimmyo
# Made linux/df work properly with tmpfs and devmapper (Deb#298442).
#
# Revision 1.5.2.1 2005/02/16 22:50:14 jimmyo
# linux/df* now ignores bind mounts.
#
# Revision 1.5 2004/12/09 20:27:45 jimmyo
# Sort fields in df*-plugins alphabetically.
#
# Revision 1.4 2004/09/25 22:29:16 jimmyo
# Added info fields to a bunch of plugins.
#
# Revision 1.3 2004/05/20 13:57:12 jimmyo
# Set categories to some of the plugins.
#
# Revision 1.2 2004/05/18 22:04:30 jimmyo
# Use "sed 1d" instead of "tail +2" in df plugins (patch by Olivier Delhomme).
#
# Revision 1.1 2004/01/02 18:50:01 jimmyo
# Renamed occurrances of lrrd -> munin
#
# Revision 1.1.1.1 2004/01/02 15:18:07 jimmyo
# Import of LRRD CVS tree after renaming to Munin
#
# Revision 1.2 2003/11/07 17:43:16 jimmyo
# Cleanups and log entries
#
#
#
# Magic markers (optional - used by munin-config and installation
# scripts):
#
#%# family=auto
#%# capabilities=autoconf
MAXLABEL=20
if [ "$1" = "autoconf" ]; then
echo yes
exit 0
fi
clean_name() {
echo $1 $7 $2 | sed 's/[\/.-]/_/g'| awk "{
if (\$3 == \"tmpfs\")
n=\$1\$2
else
n=\$1
print n
}"
}
if [ "$1" = "config" ]; then
echo 'graph_title Filesystem usage (in %)'
echo 'graph_args --upper-limit 100 -l 0'
echo 'graph_vlabel %'
echo 'graph_category disk'
echo 'graph_info This graph shows disk usage on the machine.'
df -T -P -l -x none -x unknown -x udf -x iso9660 -x romfs -x ramfs -x tmpfs | sed 1d | grep -v "//" | sort | while read i; do
name=`clean_name $i`
echo -n "$name.label "
echo $i | awk "{
dir=\$7
if (length(dir) <= $MAXLABEL)
print dir
else
printf (\"...%s\n\", substr (dir, length(dir)-$MAXLABEL+4, $MAXLABEL-3))
print \"$name.info \" \$7 \" (\" \$2 \") -> \" \$1;
}"
echo "$name.warning 92"
echo "$name.critical 98"
done
exit 0
fi
df -T -P -l -x none -x unknown -x udf -x iso9660 -x romfs -x ramfs -x proc -x sysfs -x tmpfs -x udev | sed 1d | grep -v "//" | while read i; do
name=`clean_name $i`
echo -n "$name.value "
echo $i | awk '{ print $6 }' | cut -f1 -d%
done

109
plugins/disk/dirsizes Executable file
View file

@ -0,0 +1,109 @@
#!/usr/bin/perl
# -*- perl -*-
#
#
##############################################################################
#
#
# This munin plugin watches the sizes of the given directories.
# @author Kevin Fischer
# @version 2010/08/05
# @website http://kevin-fischer.de
#
# Copy this to your node's config file (default: plugin-conf.d/munin-node):
# [dirsizes]
# user root
# env.watchdirs /var/www,/tmp
#
# Change the env.watchdirs-variable according to your wishes.
# DONT FORGET TO RUN AS ROOT!
#
# You can test this plugin by calling it with params "test" and your watchdirs:
# ./dirsizes test /dir1,/tmp/dir2
#
#
##############################################################################
#
use strict;
my @watchdirs;
if ( exists $ARGV[0] and $ARGV[0] eq "test" ) {
# Split the watchdirs string
@watchdirs = split( ",", $ARGV[1] );
}
else {
# If no dirs are given, exit.
if ( !defined( $ENV{"watchdirs"} ) ) {
die "No directories given! See the manual at top of this plugin file.";
}
# Split the watchdirs string
@watchdirs = split( ",", $ENV{"watchdirs"} );
}
# Config or read request?
if ( exists $ARGV[0] and $ARGV[0] eq "config" ) {
# Munin basic info
print "graph_title Directory sizes\n";
print "graph_args --base 1024 --lower-limit 0\n";
print "graph_vlabel directory size\n";
print "graph_info Displays the sizes of all configured directories.\n";
print "graph_category disk\n";
print "graph_total Total\n";
# All available directories
foreach my $dir (@watchdirs) {
# Remove illegal characters
my $label = $dir;
$label =~ s@[\/-]@_@g;
# Print name
print "dir", $label, ".label ", $dir, "\n";
}
}
# Read request, output the directory sizes
else {
# All available directories
foreach my $dir (@watchdirs) {
# Remove illegal characters
my $label = $dir;
$label =~ s@[\/-]@_@g;
# Get the dirsize
my $dirsize = getSize($dir);
# Get the label
my $label = niceLabelname($dir);
# Print name
print "dir", $label, ".value ", $dirsize, ".0\n";
}
}
# Function: getSize($dir)
sub getSize {
my ($dir) = @_;
# Get the size via `du`
my @dirsize = split( ' ', `du -cb $dir | grep "total" | tail -1 ` );
return @dirsize[0];
}
# Remove illegal characters
sub niceLabelname {
my ($label) = @_;
$label =~ s@[\/-]@_@g;
return $label;
}
exit 0;

89
plugins/disk/du Executable file
View file

@ -0,0 +1,89 @@
#!/bin/bash
#################################################################
#
# Plugin to monitor the size of the specified directory
#
#################################################################
#
# Parameters understood:
#
# config (required)
# autoconf (optional - checks if the path exists etc, not so advanced feature)
#
#################################################################
#
# Requirements
# - bash (or change first line to sh instead of bash or any other shell)
# - existing and readable directory to scan
# - du command, it exists on most of the *nix operating systems
#
#################################################################
#
# Configuration
#
# directory to check
DIR="/var/cache/apache2/"
# unique id, just in case you got multiple such scripts, change id as needed (i guess it shoudl be obsolete, not tested)
ID=1;
# - make sure that user/group that executes this script has acess to the directory you ahve configured
# otherwise run it as another user, edit plugins-conf.d/munin-node and stuff it with example below code (not suggested)
# remeber to remove hashes from the beginning of the lines
#
# [du]
# user root
#
# - by default the value is in MegaBytes, to change it you should edit below line in the script to something else, recognizeable by du (see man du)
# du -sm $DIR in MB
# du -sk $DIR in KB
#
#################################################################
#
# Changelog
#
# Revision 0.1 Tue 03 Feb 2009 02:16:02 PM CET _KaszpiR_
# - initial release,
#
#################################################################
# Magick markers (optional - used by munin-config and som installation
# scripts):
#%# family=auto
#%# capabilities=autoconf
#################################################################
#################################################################
if [ "$1" = "autoconf" ]; then
if [ -d $DIR ]; then
echo "yes"
exit 0
else
echo "no (check your path)"
exit 1
fi
fi
if [ "$1" = "config" ]; then
echo "graph_title Directory size: $DIR"
echo "graph_vlabel size MB"
echo "graph_category disk"
echo "graph_info Size of $DIR"
echo "dir$ID.label size"
echo "dir$ID.min 0"
echo "dir$ID.info Shows du -sm for specified directory"
exit 0
fi
echo -n "dir$ID.value "
if [ -d $DIR ]; then
SIZE=`du -sm $DIR | cut -f1`
echo $SIZE
exit 0
else
echo "U"
exit 1
fi

168
plugins/disk/du-2 Executable file
View file

@ -0,0 +1,168 @@
#!/usr/bin/perl
# vim: set filetype=perl sw=4 tabstop=4 expandtab smartindent: #
=head1 NAME
du - Plugin to monitor multiple directories size
=head1 AUTHOR
Luc Didry <luc AT didry.org>
April 2011
=head1 HOWTO CONFIGURE AND USE :
=over
=item - /etc/munin/plugin-conf.d/du_
[du]
user root
env.interval 20 # INTERVAL OF DU POLLING IN MINUTES
env.dirs /home/foo /home/bar # DIRECTORIES TO POLL
env.suppr /home/ # PLEASE USE \# INSTEAD OF #
timeout 900 # 15 MINUTES IN SECONDS
=item - /etc/munin/plugins-enabled
ln -svf ../plugins-available/site/du
=item - restart Munin node
sudo killall -TERM munin-node
=back
=head1 CREDITS
Based on the 'du_multidirs-v2' initially written in Bash by Christian Kujau <lists@nerdbynature.de> and modified by dano229.
This script was based on the 'homedirs' plugin, initially written in Perl by Philipp Gruber <pg@flupps.net>
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf
=cut
use warnings;
use strict;
use Munin::Plugin;
use POSIX qw(setsid);
my $PLUGIN_NAME = "du";
my $CACHEFILE="$Munin::Plugin::pluginstatedir/du.cache";
my $TEMPFILE="$Munin::Plugin::pluginstatedir/du.tmp";
my $LOCKFILE="$Munin::Plugin::pluginstatedir/du.lock";
my $TIMEFILE="$Munin::Plugin::pluginstatedir/du.time";
##### autoconf
if( (defined $ARGV[0]) && ($ARGV[0] eq "autoconf") ) {
print "yes\n";
## Done !
munin_exit_done();
}
## In the parent, it's just a regular munin plugin which reads a file with the infos
##### config
if( (defined $ARGV[0]) && ($ARGV[0] eq "config") ) {
print "graph_title Directory usage\n";
print "graph_args --base 1024 -l 1\n";
print "graph_vlabel Bytes\n";
print "graph_category disk\n";
print "graph_total total\n";
print "graph_info This graph shows the size of several directories\n";
my $foo = 0;
open (FILE, "<", $CACHEFILE) or munin_exit_fail();
while(defined (my $bar = <FILE>)) {
if ($bar =~ m/(\d+)\s+(.+)/) {
my $dir = $2;
clean_path(\$dir);
print "$dir.label $dir\n";
if ($foo++) {
print "$dir.draw STACK\n";
} else {
print "$dir.draw AREA\n";
}
}
}
close(FILE);
## Done !
munin_exit_done();
}
##### fetch
open (FILE, "<", $CACHEFILE) or munin_exit_fail();
while(defined (my $foo = <FILE>)) {
if ($foo =~ m/(\d+)\s+(.+)/) {
my ($field, $value) = ($2, $1);
clean_path(\$field);
print $field, ".value ", $value, "\n";
}
}
close(FILE);
daemonize();
#
##
### PUBLiC FONCTiONS
###############################################################################
## Used to create the fork
sub daemonize {
chdir '/' or die "Can't chdir to /: $!";
defined(my $pid = fork) or die "Can't fork: $!";
munin_exit_done() if $pid;
open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
open STDOUT, '>/dev/null' or die "Can't write to /dev/null: $!";
open STDERR, '>&STDOUT' or die "Can't dup stdout: $!";
setsid or die "Can't start a new session: $!";
## In the child, let's get the du infos if necessary
if (cache_is_too_old() && du_not_running()) {
my $dirs = $ENV{dirs};
system("touch $LOCKFILE; du -sb $dirs > $TEMPFILE; cat $TEMPFILE > $CACHEFILE; rm $LOCKFILE; date +%s > $TIMEFILE;");
}
exit;
} ## daemonize
## Used to remove the beginning of the paths if wanted
sub clean_path {
my ($path) = @_;
if (defined $ENV{suppr}) {
my $pattern = $ENV{suppr};
$$path =~ s#^($pattern)##;
}
} ## clean_path
## Do you really need I told you what this function is going to check ?
sub cache_is_too_old {
return 1 if (! -e $TIMEFILE);
my ($time) = `cat $TIMEFILE`;
chomp $time;
return 1 if ( (time - $time) > ($ENV{interval}*60) );
return 0;
} ## cache_is_too_old
sub du_not_running {
return 0 if (-e $LOCKFILE);
return 1;
}
sub munin_exit_done {
__munin_exit(0);
} ## sub munin_exit_done
sub munin_exit_fail {
__munin_exit(1);
} ## sub munin_exit_fail
#
##
### iNTERNALS FONCTiONS
###############################################################################
sub __munin_exit {
my $exitcode = shift;
exit($exitcode) if(defined $exitcode);
exit(1);
} ## sub __munin_exit

33
plugins/disk/du_multidirs Executable file
View file

@ -0,0 +1,33 @@
#!/bin/sh
#
# (c)2009, Christian Kujau <lists@nerdbynature.de> modified by dano229
# Based on the 'homedirs' plugin, initially written in Perl by Philipp Gruber <pg@flupps.net>
#
# We still need a cronjob to update CACHEFILE once in a while, e.g.:
# 0 * * * * root [ -O /tmp/munin-du_multidirs.cache ] && du -sk /dir /dir2 dir3/* > /tmp/munin-du_multidirs.cache
#
CACHEFILE=/tmp/munin-du_multidirs.cache
if [ "$1" = "autoconf" ]; then
echo yes
exit 0
fi
if [ "$1" = "config" ]; then
echo 'graph_title Directory usage'
echo 'graph_args --base 1024 -l 1'
echo 'graph_vlabel Bytes'
echo 'graph_category disk'
echo 'graph_info This graph shows the size of several directories'
awk '!/lost\+found/ {print $2 }' $CACHEFILE | sort | while read label; do
field=`echo "$label" | sed 's/^[^A-Za-z_]/_/' | sed 's/[^A-Za-z0-9_]/_/g'`
echo "$field".label "$label"
echo "$field".draw LINE1
# echo "$field".warning 0
# echo "$field".critical 0
done
exit 0
fi
awk '!/lost\+found/ { sub(/[^a-zA-Z_]/,"_",$2); gsub(/[^a-zA-Z0-9_]/,"_",$2); print $2".value "$1 * 1024 }' $CACHEFILE | sort -r -n -k2

35
plugins/disk/freedisk Executable file
View file

@ -0,0 +1,35 @@
#!/bin/bash
#
# Wildcard-plugin to monitor free disk.
#
# ln -s /usr/share/munin/plugins/freedisk_ /etc/munin/plugins/freedisk_hda1
#
# ...will monitor /dev/hda1.
#
# $Log$
# Revision 0.1 2010/11/01 guenter@grodotzki.ph
# init
# parse wildcard config
DISK=`basename $0 | sed 's/^freedisk_//g'`
# output config
if [ "$1" = "config" ]; then
echo "graph_title Free Disk on /dev/$DISK"
echo "graph_args --base 1024"
echo "graph_vlabel Free Disk in Bytes"
echo "graph_category disk"
echo "freedisk.label free Bytes"
echo "freedisk.draw LINE3"
echo "freedisk.cdef freedisk,1024,*"
echo "totaldisk.label total Bytes"
echo "totaldisk.draw AREA"
echo "totaldisk.cdef totaldisk,1024,*"
exit 0
fi
# get data
OUTPUT=`df -P | grep "$DISK"`
echo "freedisk.value `echo $OUTPUT | cut -d ' ' -f 4`"
echo "totaldisk.value `echo $OUTPUT | cut -d ' ' -f 2`"

55
plugins/disk/iostat Executable file
View file

@ -0,0 +1,55 @@
#!/bin/sh
#
# Monitor disk iostat on FreeBSD host.
#
# Parameters understood:
#
# config (required)
# autoconf (optional - used by munin-config)
#
# Magic markers (optional - used by munin-config and installation
# scripts):
#
#%# family=auto
#%# capabilities=autoconf
PATH=/bin:/usr/bin
if [ "$1" = "autoconf" ]; then
echo yes
exit 0
fi
DISKS=`/usr/sbin/iostat -dIn9 | head -1`
if [ "$1" = "config" ]; then
echo 'graph_title IOstat'
echo 'graph_args --base 1024 -l 0'
echo 'graph_vlabel Bytes per ${graph_period}'
echo 'graph_category disk'
echo 'graph_info This graph shows disk load on the machine.'
for D in $DISKS
do
if echo $D | grep -vq '^pass'; then
echo "$D.label $D"
echo "$D.type DERIVE"
echo "$D.min 0"
fi
done
exit 0
fi
VALUES=`/usr/sbin/iostat -dIn9 | tail -1`
COL=3 # 3rd value for each disk is grabbed
for D in $DISKS
do
if echo $D | grep -vq '^pass'; then
echo -n "$D.value "
VAL=`echo $VALUES | cut -d ' ' -f $COL`
echo "$VAL 1048576 * p" | dc | cut -d '.' -f 1
fi
COL=$(($COL + 3))
done

54
plugins/disk/iostat-xfrs Executable file
View file

@ -0,0 +1,54 @@
#!/bin/sh
#
# Monitor disk iostat on FreeBSD host.
#
# Parameters understood:
#
# config (required)
# autoconf (optional - used by munin-config)
#
# Magic markers (optional - used by munin-config and installation
# scripts):
#
#%# family=auto
#%# capabilities=autoconf
PATH=/bin:/usr/bin
if [ "$1" = "autoconf" ]; then
echo yes
exit 0
fi
DISKS=`/usr/sbin/iostat -dIn9 | head -1`
if [ "$1" = "config" ]; then
echo 'graph_title IOstat xfrs'
echo 'graph_args -l 0'
echo 'graph_vlabel Transfers per ${graph_period}'
echo 'graph_category disk'
echo 'graph_info This graph shows disk load on the machine.'
for D in $DISKS
do
if echo $D | grep -vq '^pass'; then
echo "$D.label $D"
echo "$D.type DERIVE"
echo "$D.min 0"
fi
done
exit 0
fi
VALUES=`/usr/sbin/iostat -dIn9 | tail -1`
COL=2 # 2nd value for each disk is grabbed
for D in $DISKS
do
if echo $D | grep -vq '^pass'; then
echo -n "$D.value "
echo $VALUES | cut -d ' ' -f $COL
fi
COL=$(($COL + 3))
done

702
plugins/disk/linux_diskstat_ Executable file
View file

@ -0,0 +1,702 @@
#!/usr/bin/perl -w
# vim: sts=4 sw=4 ts=8
# Munin markers:
#%# family=auto
#%# capabilities=autoconf suggest
# Author: Michael Renner <michael.renner@amd.co.at>
# Version: 0.0.5, 2009-05-22
=head1 NAME
linux_diskstat_ - Munin plugin to monitor various values provided
via C</proc/diskstats>
=head1 APPLICABLE SYSTEMS
Linux 2.6 systems with extended block device statistics enabled.
=head1 INTERPRETATION
Among the more self-describing or well-known values like C<throughput>
(Bytes per second) there are a few which might need further introduction.
=head2 Device Utilization
Linux provides a counter which increments in a millisecond-interval for as long
as there are outstanding I/O requests. If this counter is close to 1000msec
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
provides values averaged over a 5 minute time frame per default, so it can't
catch short-lived saturations, but it'll give a nice trend for semi-uniform
load patterns as they're expected in most server or multi-user environments.
=head2 Device IO Time
The C<Device IO Time> takes the counter described under C<Device Utilization>
and divides it by the number of I/Os that happened in the given time frame,
resulting in an average time per I/O on the block-device level.
This value can give you a good comparison base amongst different controllers,
storage subsystems and disks for similiar workloads.
=head2 Syscall Wait Time
These values describe the average time it takes between an application issuing
a syscall resulting in a hit to a blockdevice to the syscall returning to the
application.
The values are bound to be higher (at least for read requests) than the time
it takes the device itself to fulfill the requests, since calling overhead,
queuing times and probably a dozen other things are included in those times.
These are the values to watch out for when an user complains that C<the disks
are too slow!>.
=head3 What causes a block device hit?
A non-exhaustive list:
=over
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
flag is set.
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
is exceeded.
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
modification of any of the values returned by stat(2), etc.)
=item * The pdflush daemon writing out dirtied pages
=item * (f)sync
=item * Swapping
=item * raw device I/O (mkfs, dd, etc.)
=back
=head1 ACKNOWLEDGEMENTS
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
package written and maintained by Sebastien Godard.
=head1 SEE ALSO
See C<Documentation/iostats.txt> in your Linux source tree for further information
about the C<numbers> involved in this module.
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
about the pdflush daemon.
=head1 AUTHOR
Michael Renner <michael.renner@amd.co.at>
=head1 LICENSE
GPLv2
=cut
use strict;
use File::Basename;
use Carp;
use POSIX;
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
# Don't try this at home
eval { require Munin::Plugin; Munin::Plugin->import; };
if ($@) {
fake_munin_plugin();
}
# Sanity check to ensure that the script is called the correct name.
if (basename($0) !~ /^linux_diskstat_/) {
die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
}
############
# autoconf #
############
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
my %stats;
# Capture any croaks on the way
eval { %stats = parse_diskstats() };
if ( !$@ && keys %stats ) {
print "yes\n";
exit 0;
}
else {
print "no\n";
exit 1;
}
}
###########
# suggest #
###########
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
my %diskstats = parse_diskstats();
my %suggested_devices;
DEVICE:
for my $devname ( sort keys %diskstats ) {
# Skip devices without traffic
next
if ( $diskstats{$devname}->{'rd_ios'} == 0
&& $diskstats{$devname}->{'wr_ios'} == 0 );
for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
# Filter out devices (partitions) which are matched by existing ones
# e.g. sda1 -> sda, c0d0p1 -> c0d0
next DEVICE if ( $devname =~ m/$existing_device/ );
}
push @{ $suggested_devices{'iops'} }, $devname;
push @{ $suggested_devices{'throughput'} }, $devname;
# Only suggest latency graphs if the device supports it
if ( $diskstats{$devname}->{'rd_ticks'} > 0
|| $diskstats{$devname}->{'wr_ticks'} > 0 )
{
push @{ $suggested_devices{'latency'} }, $devname;
}
}
for my $mode ( keys %suggested_devices ) {
for my $device ( sort @{ $suggested_devices{$mode} } ) {
my $printdev = translate_device_name($device, 'TO_FS');
print "${mode}_$printdev\n";
}
}
exit 0;
}
# Reading the scripts invocation name and setting some parameters,
# needed from here on
my $basename = basename($0);
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
if ( not defined $device ) {
croak "Didn't get a device name. Aborting\n";
}
$device = translate_device_name($device, 'FROM_FS');
##########
# config #
##########
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
my $pretty_device = $device;
if ($device =~ /^dm-\d+$/) {
$pretty_device = translate_devicemapper_name($device);
}
if ( $mode eq 'latency' ) {
print <<EOF;
graph_title Disk latency for /dev/$pretty_device
graph_args --base 1000
graph_category disk
util.label Device utilization (percent)
util.type GAUGE
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
util.min 0
svctm.label Average device IO time (ms)
svctm.type GAUGE
svctm.info Average time an I/O takes on the block device
svctm.min 0
avgwait.label Average IO Wait time (ms)
avgwait.type GAUGE
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
avgwait.min 0
avgrdwait.label Average Read IO Wait time (ms)
avgrdwait.type GAUGE
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
avgrdwait.min 0
avgwrwait.label Average Write IO Wait time (ms)
avgwrwait.type GAUGE
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
avgwrwait.min 0
EOF
}
elsif ( $mode eq 'throughput' ) {
print <<EOF;
graph_title Disk throughput for /dev/$pretty_device
graph_args --base 1024
graph_vlabel Bytes/second
graph_category disk
rdbytes.label Read Bytes
rdbytes.type GAUGE
rdbytes.min 0
wrbytes.label Write Bytes
wrbytes.type GAUGE
wrbytes.min 0
EOF
}
elsif ( $mode eq 'iops' ) {
print <<EOF;
graph_title Disk IOs for /dev/$pretty_device
graph_args --base 1000
graph_vlabel Units/second
graph_category disk
rdio.label Read IO/sec
rdio.type GAUGE
rdio.min 0
wrio.label Write IO/sec
wrio.type GAUGE
wrio.min 0
avgrqsz.label Average Request Size (KiB)
avgrqsz.type GAUGE
avgrqsz.min 0
avgrdrqsz.label Average Read Request Size (KiB)
avgrdrqsz.type GAUGE
avgrdrqsz.min 0
avgwrrqsz.label Average Write Request Size (KiB)
avgwrrqsz.type GAUGE
avgwrrqsz.min 0
EOF
}
else {
croak "Unknown mode $mode\n";
}
exit 0;
}
########
# MAIN #
########
my %cur_diskstat = fetch_device_counters($device);
my ( $prev_time, %prev_diskstat ) = restore_state();
save_state( time(), %cur_diskstat );
# Probably the first run for the given device, we need state to do our job,
# so let's wait for the next run.
exit if ( not defined $prev_time or not %prev_diskstat );
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
########
# SUBS #
########
sub calculate_and_print_values {
my ( $prev_time, $prev_stats, $cur_stats ) = @_;
my $bytes_per_sector = 512;
my $interval = time() - $prev_time;
my $read_ios = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
my $read_io_per_sec = $read_ios / $interval;
my $write_io_per_sec = $write_ios / $interval;
my $read_bytes_per_sec = $rd_sectors / $interval * $bytes_per_sector;
my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
my $total_ios = $read_ios + $write_ios;
my $total_ios_per_sec = $total_ios / $interval;
# Utilization - or "how busy is the device"?
# If the time spent for I/O was close to 1000msec for
# a given second, the device is nearly 100% saturated.
my $utilization = $tot_ticks / $interval;
# Average time an I/O takes on the block device
my $servicetime =
$total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
# Average wait time for an I/O from start to finish
# (includes queue times et al)
my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
my $average_rd_wait = $read_ios ? $rd_ticks / $read_ios : 0;
my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
my $average_rq_size_in_kb =
$total_ios
? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
: 0;
my $average_rd_rq_size_in_kb =
$read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
my $average_wr_rq_size_in_kb =
$write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
my $util_print = $utilization / 10;
if ( $mode eq 'latency' ) {
print <<EOF;
util.value $util_print
svctm.value $servicetime
avgwait.value $average_wait
avgrdwait.value $average_rd_wait
avgwrwait.value $average_wr_wait
EOF
}
elsif ( $mode eq 'throughput' ) {
print <<EOF;
rdbytes.value $read_bytes_per_sec
wrbytes.value $write_bytes_per_sec
EOF
}
elsif ( $mode eq 'iops' ) {
print <<EOF;
rdio.value $read_io_per_sec
wrio.value $write_io_per_sec
avgrqsz.value $average_rq_size_in_kb
avgrdrqsz.value $average_rd_rq_size_in_kb
avgwrrqsz.value $average_wr_rq_size_in_kb
EOF
}
else {
croak "Unknown mode $mode\n";
}
}
sub read_diskstats {
open STAT, '< /proc/diskstats'
or croak "Failed to open '/proc/diskstats': $!\n";
my @lines;
for my $line (<STAT>) {
# Strip trailing newline and leading whitespace
chomp $line;
$line =~ s/^\s+//;
my @elems = split /\s+/, $line;
# We explicitly don't support old-style diskstats
# There are situations where only _some_ lines (e.g.
# partitions on older 2.6 kernels) have fewer stats
# numbers, therefore we'll skip them silently
if ( @elems != 14 ) {
next;
}
push @lines, \@elems;
}
close STAT or croak "Failed to close '/proc/diskstats': $!";
return @lines;
}
sub read_sysfs {
my ($want_device) = @_;
my @devices;
my @lines;
if ( defined $want_device ) {
# sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
$want_device =~ tr#/#!#;
@devices = $want_device;
}
else {
@devices = glob "/sys/block/*/stat";
@devices = map { m!/sys/block/([^/]+)/stat! } @devices;
}
for my $cur_device (@devices) {
my $stats_file = "/sys/block/$cur_device/stat";
open STAT, "< $stats_file"
or croak "Failed to open '$stats_file': $!\n";
my $line = <STAT>;
# Trimming whitespace
$line =~ s/^\s+//;
chomp $line;
my @elems = split /\s+/, $line;
croak "'$stats_file' doesn't contain exactly 11 values. Aborting"
if ( @elems != 11 );
# Translate the devicename back before storing the information
$cur_device =~ tr#!#/#;
# Faking missing diskstats values
unshift @elems, ( '', '', $cur_device );
push @lines, \@elems;
close STAT or croak "Failed to close '$stats_file': $!\n";
}
return @lines;
}
sub parse_diskstats {
my ($want_device) = @_;
my @stats;
if ( glob "/sys/block/*/stat" ) {
@stats = read_sysfs($want_device);
}
else {
@stats = read_diskstats();
}
my %diskstats;
for my $entry (@stats) {
my %devstat;
# Hash-Slicing for fun and profit
@devstat{
qw(major minor devname
rd_ios rd_merges rd_sectors rd_ticks
wr_ios wr_merges wr_sectors wr_ticks
ios_in_prog tot_ticks rq_ticks)
}
= @{$entry};
$diskstats{ $devstat{'devname'} } = \%devstat;
}
return %diskstats;
}
sub fetch_device_counters {
my ($want_device) = @_;
my %diskstats = parse_diskstats($want_device);
for my $devname ( keys %diskstats ) {
if ( $want_device eq $devname ) {
return %{ $diskstats{$devname} };
}
}
return undef;
}
# We use '+' (and formerly '-') as placeholder for '/' in device-names
# used as calling name for the script.
sub translate_device_name {
my ($device, $mode) = @_;
if ($mode eq 'FROM_FS') {
# Hackaround to mitigate issues with unwisely chosen former separator
if ( not ($device =~ m/dm-\d+/)) {
$device =~ tr#-+#//#;
}
}
elsif ($mode eq 'TO_FS') {
$device =~ tr#/#+#;
}
else {
croak "translate_device_name: Unknown mode\n";
}
return $device;
}
sub fake_munin_plugin {
my $eval_code = <<'EOF';
use Storable;
my $storable_filename = basename($0);
$storable_filename = "/tmp/munin-state-$storable_filename";
sub save_state {
my @state = @_;
if ( not -e $storable_filename or -f $storable_filename ) {
store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
}
else {
croak "$storable_filename is probably not a regular file. Please delete it.\n";
}
}
sub restore_state {
if (-f $storable_filename) {
my $state = retrieve($storable_filename);
return @{$state};
}
else {
return undef;
}
}
EOF
eval($eval_code);
}
sub translate_devicemapper_name {
my ($device) = @_;
my ($want_minor) = $device =~ m/^dm-(\d+)$/;
croak "Failed to extract devicemapper id" unless defined ($want_minor);
my $dm_major = find_devicemapper_major();
croak "Failed to get device-mapper major number\n" unless defined $dm_major;
for my $entry (glob "/dev/mapper/\*") {
my $rdev = (stat($entry))[6];
my $major = floor($rdev / 256);
my $minor = $rdev % 256;
if ($major == $dm_major && $minor == $want_minor) {
my $pretty_name = translate_lvm_name($entry);
return defined $pretty_name ? $pretty_name : $entry;
}
}
# Return original string if the device can't be found.
return $device;
}
sub translate_lvm_name {
my ($entry) = @_;
my $device_name = basename($entry);
# Check for single-dash-occurence to see if this could be a lvm devicemapper device.
if ($device_name =~ m/(?<!-)-(?!-)/) {
# split device name into vg and lv parts
my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
return undef unless ( defined($vg) && defined($lv) );
# remove extraneous dashes from vg and lv names
$vg =~ s/--/-/g;
$lv =~ s/--/-/g;
$device_name = "$vg/$lv";
# Sanity check - does the constructed device name exist?
if (stat("/dev/$device_name")) {
return "$device_name";
}
}
return undef;
}
sub find_devicemapper_major {
open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
my $dm_major;
for my $line (<FH>) {
chomp $line;
my ($major, $name) = split /\s+/, $line, 2;
next unless defined $name;
if ($name eq 'device-mapper') {
$dm_major = $major;
last;
}
}
close(FH);
return $dm_major;
}

702
plugins/disk/linux_diskstats_ Executable file
View file

@ -0,0 +1,702 @@
#!/usr/bin/perl -w
# vim: sts=4 sw=4 ts=8
# Munin markers:
#%# family=auto
#%# capabilities=autoconf suggest
# Author: Michael Renner <michael.renner@amd.co.at>
# Version: 0.0.5, 2009-05-22
=head1 NAME
linux_diskstat_ - Munin plugin to monitor various values provided
via C</proc/diskstats>
=head1 APPLICABLE SYSTEMS
Linux 2.6 systems with extended block device statistics enabled.
=head1 INTERPRETATION
Among the more self-describing or well-known values like C<throughput>
(Bytes per second) there are a few which might need further introduction.
=head2 Device Utilization
Linux provides a counter which increments in a millisecond-interval for as long
as there are outstanding I/O requests. If this counter is close to 1000msec
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
provides values averaged over a 5 minute time frame per default, so it can't
catch short-lived saturations, but it'll give a nice trend for semi-uniform
load patterns as they're expected in most server or multi-user environments.
=head2 Device IO Time
The C<Device IO Time> takes the counter described under C<Device Utilization>
and divides it by the number of I/Os that happened in the given time frame,
resulting in an average time per I/O on the block-device level.
This value can give you a good comparison base amongst different controllers,
storage subsystems and disks for similiar workloads.
=head2 Syscall Wait Time
These values describe the average time it takes between an application issuing
a syscall resulting in a hit to a blockdevice to the syscall returning to the
application.
The values are bound to be higher (at least for read requests) than the time
it takes the device itself to fulfill the requests, since calling overhead,
queuing times and probably a dozen other things are included in those times.
These are the values to watch out for when an user complains that C<the disks
are too slow!>.
=head3 What causes a block device hit?
A non-exhaustive list:
=over
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
flag is set.
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
is exceeded.
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
modification of any of the values returned by stat(2), etc.)
=item * The pdflush daemon writing out dirtied pages
=item * (f)sync
=item * Swapping
=item * raw device I/O (mkfs, dd, etc.)
=back
=head1 ACKNOWLEDGEMENTS
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
package written and maintained by Sebastien Godard.
=head1 SEE ALSO
See C<Documentation/iostats.txt> in your Linux source tree for further information
about the C<numbers> involved in this module.
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
about the pdflush daemon.
=head1 AUTHOR
Michael Renner <michael.renner@amd.co.at>
=head1 LICENSE
GPLv2
=cut
use strict;
use File::Basename;
use Carp;
use POSIX;
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
# Don't try this at home
eval { require Munin::Plugin; Munin::Plugin->import; };
if ($@) {
fake_munin_plugin();
}
# Sanity check to ensure that the script is called the correct name.
if (basename($0) !~ /^linux_diskstat_/) {
die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
}
############
# autoconf #
############
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
my %stats;
# Capture any croaks on the way
eval { %stats = parse_diskstats() };
if ( !$@ && keys %stats ) {
print "yes\n";
exit 0;
}
else {
print "no\n";
exit 1;
}
}
###########
# suggest #
###########
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
my %diskstats = parse_diskstats();
my %suggested_devices;
DEVICE:
for my $devname ( sort keys %diskstats ) {
# Skip devices without traffic
next
if ( $diskstats{$devname}->{'rd_ios'} == 0
&& $diskstats{$devname}->{'wr_ios'} == 0 );
for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
# Filter out devices (partitions) which are matched by existing ones
# e.g. sda1 -> sda, c0d0p1 -> c0d0
next DEVICE if ( $devname =~ m/$existing_device/ );
}
push @{ $suggested_devices{'iops'} }, $devname;
push @{ $suggested_devices{'throughput'} }, $devname;
# Only suggest latency graphs if the device supports it
if ( $diskstats{$devname}->{'rd_ticks'} > 0
|| $diskstats{$devname}->{'wr_ticks'} > 0 )
{
push @{ $suggested_devices{'latency'} }, $devname;
}
}
for my $mode ( keys %suggested_devices ) {
for my $device ( sort @{ $suggested_devices{$mode} } ) {
my $printdev = translate_device_name($device, 'TO_FS');
print "${mode}_$printdev\n";
}
}
exit 0;
}
# Reading the scripts invocation name and setting some parameters,
# needed from here on
my $basename = basename($0);
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
if ( not defined $device ) {
croak "Didn't get a device name. Aborting\n";
}
$device = translate_device_name($device, 'FROM_FS');
##########
# config #
##########
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
my $pretty_device = $device;
if ($device =~ /^dm-\d+$/) {
$pretty_device = translate_devicemapper_name($device);
}
if ( $mode eq 'latency' ) {
print <<EOF;
graph_title Disk latency for /dev/$pretty_device
graph_args --base 1000
graph_category disk
util.label Device utilization (percent)
util.type GAUGE
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
util.min 0
svctm.label Average device IO time (ms)
svctm.type GAUGE
svctm.info Average time an I/O takes on the block device
svctm.min 0
avgwait.label Average IO Wait time (ms)
avgwait.type GAUGE
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
avgwait.min 0
avgrdwait.label Average Read IO Wait time (ms)
avgrdwait.type GAUGE
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
avgrdwait.min 0
avgwrwait.label Average Write IO Wait time (ms)
avgwrwait.type GAUGE
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
avgwrwait.min 0
EOF
}
elsif ( $mode eq 'throughput' ) {
print <<EOF;
graph_title Disk throughput for /dev/$pretty_device
graph_args --base 1024
graph_vlabel Bytes/second
graph_category disk
rdbytes.label Read Bytes
rdbytes.type GAUGE
rdbytes.min 0
wrbytes.label Write Bytes
wrbytes.type GAUGE
wrbytes.min 0
EOF
}
elsif ( $mode eq 'iops' ) {
print <<EOF;
graph_title Disk IOs for /dev/$pretty_device
graph_args --base 1000
graph_vlabel Units/second
graph_category disk
rdio.label Read IO/sec
rdio.type GAUGE
rdio.min 0
wrio.label Write IO/sec
wrio.type GAUGE
wrio.min 0
avgrqsz.label Average Request Size (KiB)
avgrqsz.type GAUGE
avgrqsz.min 0
avgrdrqsz.label Average Read Request Size (KiB)
avgrdrqsz.type GAUGE
avgrdrqsz.min 0
avgwrrqsz.label Average Write Request Size (KiB)
avgwrrqsz.type GAUGE
avgwrrqsz.min 0
EOF
}
else {
croak "Unknown mode $mode\n";
}
exit 0;
}
########
# MAIN #
########
my %cur_diskstat = fetch_device_counters($device);
my ( $prev_time, %prev_diskstat ) = restore_state();
save_state( time(), %cur_diskstat );
# Probably the first run for the given device, we need state to do our job,
# so let's wait for the next run.
exit if ( not defined $prev_time or not %prev_diskstat );
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
########
# SUBS #
########
sub calculate_and_print_values {
my ( $prev_time, $prev_stats, $cur_stats ) = @_;
my $bytes_per_sector = 512;
my $interval = time() - $prev_time;
my $read_ios = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
my $read_io_per_sec = $read_ios / $interval;
my $write_io_per_sec = $write_ios / $interval;
my $read_bytes_per_sec = $rd_sectors / $interval * $bytes_per_sector;
my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
my $total_ios = $read_ios + $write_ios;
my $total_ios_per_sec = $total_ios / $interval;
# Utilization - or "how busy is the device"?
# If the time spent for I/O was close to 1000msec for
# a given second, the device is nearly 100% saturated.
my $utilization = $tot_ticks / $interval;
# Average time an I/O takes on the block device
my $servicetime =
$total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
# Average wait time for an I/O from start to finish
# (includes queue times et al)
my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
my $average_rd_wait = $read_ios ? $rd_ticks / $read_ios : 0;
my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
my $average_rq_size_in_kb =
$total_ios
? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
: 0;
my $average_rd_rq_size_in_kb =
$read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
my $average_wr_rq_size_in_kb =
$write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
my $util_print = $utilization / 10;
if ( $mode eq 'latency' ) {
print <<EOF;
util.value $util_print
svctm.value $servicetime
avgwait.value $average_wait
avgrdwait.value $average_rd_wait
avgwrwait.value $average_wr_wait
EOF
}
elsif ( $mode eq 'throughput' ) {
print <<EOF;
rdbytes.value $read_bytes_per_sec
wrbytes.value $write_bytes_per_sec
EOF
}
elsif ( $mode eq 'iops' ) {
print <<EOF;
rdio.value $read_io_per_sec
wrio.value $write_io_per_sec
avgrqsz.value $average_rq_size_in_kb
avgrdrqsz.value $average_rd_rq_size_in_kb
avgwrrqsz.value $average_wr_rq_size_in_kb
EOF
}
else {
croak "Unknown mode $mode\n";
}
}
sub read_diskstats {
open STAT, '< /proc/diskstats'
or croak "Failed to open '/proc/diskstats': $!\n";
my @lines;
for my $line (<STAT>) {
# Strip trailing newline and leading whitespace
chomp $line;
$line =~ s/^\s+//;
my @elems = split /\s+/, $line;
# We explicitly don't support old-style diskstats
# There are situations where only _some_ lines (e.g.
# partitions on older 2.6 kernels) have fewer stats
# numbers, therefore we'll skip them silently
if ( @elems != 14 ) {
next;
}
push @lines, \@elems;
}
close STAT or croak "Failed to close '/proc/diskstats': $!";
return @lines;
}
sub read_sysfs {
my ($want_device) = @_;
my @devices;
my @lines;
if ( defined $want_device ) {
# sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
$want_device =~ tr#/#!#;
@devices = $want_device;
}
else {
@devices = glob "/sys/block/*/stat";
@devices = map { m!/sys/block/([^/]+)/stat! } @devices;
}
for my $cur_device (@devices) {
my $stats_file = "/sys/block/$cur_device/stat";
open STAT, "< $stats_file"
or croak "Failed to open '$stats_file': $!\n";
my $line = <STAT>;
# Trimming whitespace
$line =~ s/^\s+//;
chomp $line;
my @elems = split /\s+/, $line;
croak "'$stats_file' doesn't contain exactly 11 values. Aborting"
if ( @elems != 11 );
# Translate the devicename back before storing the information
$cur_device =~ tr#!#/#;
# Faking missing diskstats values
unshift @elems, ( '', '', $cur_device );
push @lines, \@elems;
close STAT or croak "Failed to close '$stats_file': $!\n";
}
return @lines;
}
sub parse_diskstats {
my ($want_device) = @_;
my @stats;
if ( glob "/sys/block/*/stat" ) {
@stats = read_sysfs($want_device);
}
else {
@stats = read_diskstats();
}
my %diskstats;
for my $entry (@stats) {
my %devstat;
# Hash-Slicing for fun and profit
@devstat{
qw(major minor devname
rd_ios rd_merges rd_sectors rd_ticks
wr_ios wr_merges wr_sectors wr_ticks
ios_in_prog tot_ticks rq_ticks)
}
= @{$entry};
$diskstats{ $devstat{'devname'} } = \%devstat;
}
return %diskstats;
}
sub fetch_device_counters {
my ($want_device) = @_;
my %diskstats = parse_diskstats($want_device);
for my $devname ( keys %diskstats ) {
if ( $want_device eq $devname ) {
return %{ $diskstats{$devname} };
}
}
return undef;
}
# We use '+' (and formerly '-') as placeholder for '/' in device-names
# used as calling name for the script.
sub translate_device_name {
my ($device, $mode) = @_;
if ($mode eq 'FROM_FS') {
# Hackaround to mitigate issues with unwisely chosen former separator
if ( not ($device =~ m/dm-\d+/)) {
$device =~ tr#-+#//#;
}
}
elsif ($mode eq 'TO_FS') {
$device =~ tr#/#+#;
}
else {
croak "translate_device_name: Unknown mode\n";
}
return $device;
}
sub fake_munin_plugin {
my $eval_code = <<'EOF';
use Storable;
my $storable_filename = basename($0);
$storable_filename = "/tmp/munin-state-$storable_filename";
sub save_state {
my @state = @_;
if ( not -e $storable_filename or -f $storable_filename ) {
store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
}
else {
croak "$storable_filename is probably not a regular file. Please delete it.\n";
}
}
sub restore_state {
if (-f $storable_filename) {
my $state = retrieve($storable_filename);
return @{$state};
}
else {
return undef;
}
}
EOF
eval($eval_code);
}
sub translate_devicemapper_name {
my ($device) = @_;
my ($want_minor) = $device =~ m/^dm-(\d+)$/;
croak "Failed to extract devicemapper id" unless defined ($want_minor);
my $dm_major = find_devicemapper_major();
croak "Failed to get device-mapper major number\n" unless defined $dm_major;
for my $entry (glob "/dev/mapper/\*") {
my $rdev = (stat($entry))[6];
my $major = floor($rdev / 256);
my $minor = $rdev % 256;
if ($major == $dm_major && $minor == $want_minor) {
my $pretty_name = translate_lvm_name($entry);
return defined $pretty_name ? $pretty_name : $entry;
}
}
# Return original string if the device can't be found.
return $device;
}
sub translate_lvm_name {
my ($entry) = @_;
my $device_name = basename($entry);
# Check for single-dash-occurence to see if this could be a lvm devicemapper device.
if ($device_name =~ m/(?<!-)-(?!-)/) {
# split device name into vg and lv parts
my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
return undef unless ( defined($vg) && defined($lv) );
# remove extraneous dashes from vg and lv names
$vg =~ s/--/-/g;
$lv =~ s/--/-/g;
$device_name = "$vg/$lv";
# Sanity check - does the constructed device name exist?
if (stat("/dev/$device_name")) {
return "$device_name";
}
}
return undef;
}
sub find_devicemapper_major {
open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
my $dm_major;
for my $line (<FH>) {
chomp $line;
my ($major, $name) = split /\s+/, $line, 2;
next unless defined $name;
if ($name eq 'device-mapper') {
$dm_major = $major;
last;
}
}
close(FH);
return $dm_major;
}

30
plugins/disk/log_sizes Executable file
View file

@ -0,0 +1,30 @@
#!/bin/sh
#H=`echo $0 | awk -F_ '{print $2}'`
#LOGFILES=`ls /var/log/messages /var/log/syslog /var/log/daemon.log /myapplication/logs/*.log`
LOGFILES="/var/log/messages /var/log/syslog /var/log/daemon.log"
if [ "$1" = "config" ] ; then
echo "graph_title log sizes"
echo "graph_category disk"
echo "graph_info this graph shows sizes of log files"
echo "graph_vlabel size (bytes)"
for F in $LOGFILES
do
MF=`echo $F | sed 's/[-\/\.]/_/g'`
echo "$MF.label $F"
done
else
for F in $LOGFILES
do
MF=`echo $F | sed 's/[-\/\.]/_/g'`
echo -n "$MF.value "
stat --printf="%s\n" $F
done
fi

58
plugins/disk/lvm_ Executable file
View file

@ -0,0 +1,58 @@
#!/bin/sh
#
# Script to monitor disk usage.
#
# By PatrickDK
#
# Parameters understood:
#
# config (required)
# autoconf (optional - used by munin-config)
#
# $Log$
#
# Magic markers (optional - used by munin-config and installation
# scripts):
#
#%# family=auto
#%# capabilities=autoconf
if [ "$1" = "autoconf" ]; then
echo yes
exit 0
fi
vg=`echo $0 | awk '{ sub(".*lvm_","",\$1); print \$1; }'`
clean_name() {
echo $1 | sed 's/[\/.-]/_/g'
}
if [ "$1" = "config" ]; then
echo 'graph_title Logical Volume usage'
echo 'graph_args --base 1000 -l 0'
# echo 'graph_vlabel %'
echo 'graph_category disk'
echo 'graph_info This graph shows disk usage on the machine.'
echo "free.label free"
echo "free.draw AREA"
lvs --units b --nosuffix --noheadings | grep "$vg" | while read i; do
name=`clean_name $i`
echo -n "$name.label "
echo $i | awk '{ print $1 }'
echo "$name.draw STACK"
done
exit 0
fi
i=`vgs --units b --nosuffix --noheadings | grep "$vg"`
echo -n "free.value "
echo $i | awk '{ print $7 }'
lvs --units b --nosuffix --noheadings | grep "$vg" | while read i; do
name=`clean_name $i`
echo -n "$name.value "
echo $i | awk '{ print $4 }'
done

44
plugins/disk/lvm_snap_used Executable file
View file

@ -0,0 +1,44 @@
#!/bin/bash
#
# Plugin to monitor the % of allocated area of a LVM snapshot
#
# Parameters:
#
# config
# autoconf
#
# Configuration variables
# no config variables
#
#%# family=auto
#%# capabilities=autoconf
#
# 2011/05/20 - pmoranga - initial version
#
# 2012/01/27 - Sébastien Gross
# - Fix lvdisplay path
lvdisplay=$(which lvdisplay)
if [ "$1" = "autoconf" ]; then
if test -n "${lvdisplay}"; then
echo yes
exit 0
fi
echo "no lvdisplay found"
exit 1
fi
if [ "$1" = "config" ]; then
echo 'graph_title Allocated space for snapshot'
echo 'graph_vlabel %'
echo 'graph_category disk'
echo 'graph_args --base 100'
${lvdisplay} -C | awk '$3 ~ /^s/{print $1".label "$1" snapshot of "$5} '
exit 0
fi
${lvdisplay} -C | awk '$3 ~ /^s/{print $1".value",int($6)} '

120
plugins/disk/lvm_usage Executable file
View file

@ -0,0 +1,120 @@
#! /usr/bin/perl -w
=head1 NAME
lvm_usage - Plugin to monitor usage of LVM volume groups
=head1 CONFIGURATION
Must be run as root:
[lvm_usage]
user root
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf
=head1 AUTHOR
Gábor Gombás <gombasg@sztaki.hu>
=head1 LICENSE
GPLv2 or later
=cut
use strict;
use Munin::Plugin;
use Carp;
need_multigraph();
if ($ARGV[0] and $ARGV[0] eq 'autoconf') {
if (-c "/dev/mapper/control") {
print "yes\n";
}
else {
print "no (/dev/mapper/control is missing)\n";
}
exit 0;
}
my %vgs;
open(VGS, "vgs --units b --nosuffix --noheadings -o vg_name,vg_size,vg_free |")
or croak("Failed to run 'lvs': " . $!);
while (my $line = <VGS>) {
chomp $line;
$line =~ s/^\s+//;
my ($vg_name, $vg_size, $vg_free) = split(/\s+/, $line);
$vgs{$vg_name}->{size} = $vg_size unless $vgs{$vg_name}->{size};
$vgs{$vg_name}->{free} = $vg_free unless $vgs{$vg_name}->{free};
$vgs{$vg_name}->{lvs} = {};
}
close VGS;
open(LVS, "lvs --units b --nosuffix --noheadings -o vg_name,lv_name,lv_size |")
or croak("Failed to run 'lvs': " . $!);
while (my $line = <LVS>) {
chomp $line;
$line =~ s/^\s+//;
my ($vg_name, $lv_name, $lv_size) = split(/\s+/, $line);
$vgs{$vg_name}->{lvs}->{$lv_name} = $lv_size;
}
close LVS;
if ($ARGV[0] and $ARGV[0] eq 'config') {
print "multigraph lvm_usage\n";
print "graph_title LVM volume group usage\n";
print "graph_args --base 1024 --lower-limit 0 --upper-limit 100\n";
print "graph_vlabel %\n";
print "graph_category disk\n";
print "graph_order " . join(' ', map { clean_fieldname($_) } (sort keys %vgs)) . "\n";
for my $vg (sort keys %vgs) {
my $id = clean_fieldname($vg);
print "$id.label $vg\n";
print "$id.type GAUGE\n";
print "$id.draw LINE2\n";
}
for my $vg (sort keys %vgs) {
my $id = clean_fieldname($vg);
print "multigraph lvm_usage.$id\n";
print "graph_title Volume group usage ($vg)\n";
print "graph_args --base 1024 --lower-limit 0\n";
print "graph_vlabel bytes\n";
print "graph_category disk\n";
print "__free.label Free space\n";
print "__free.draw AREA\n";
foreach my $lv (sort keys %{$vgs{$vg}->{lvs}}) {
my $id = clean_fieldname($lv);
print "$id.label $lv\n";
print "$id.draw STACK\n";
}
}
exit 0;
}
print "multigraph lvm_usage\n";
for my $vg (sort keys %vgs) {
my $id = clean_fieldname($vg);
my $used = $vgs{$vg}->{'size'} - $vgs{$vg}->{'free'};
print "$id.value " . int($used * 100 / $vgs{$vg}->{'size'}) . "\n";
}
for my $vg (sort keys %vgs) {
my $id = clean_fieldname($vg);
print "multigraph lvm_usage.$id\n";
print "__free.value " . $vgs{$vg}->{free} . "\n";
foreach my $lv (sort keys %{$vgs{$vg}->{lvs}}) {
my $id = clean_fieldname($lv);
print "$id.value " . $vgs{$vg}->{lvs}->{$lv} . "\n";
}
}

283
plugins/disk/md_iostat_ Executable file
View file

@ -0,0 +1,283 @@
#!/usr/bin/perl -w
#
# Plugin for watching io-bound traffic (in blocks) on disks.
#
# Usage: Link or copy into /etc/lrrd/client.d/
#
# Parameters:
#
# config (required)
# autoconf (optional - used by lrrd-config)
#
# $Log$
# Revision 1.14 2004/12/10 18:51:44 jimmyo
# linux/apt* has been forced to LANG=C, to get predictable output.
#
# Revision 1.13 2004/12/10 10:47:49 jimmyo
# Change name from ${scale} to ${graph_period}, to be more consistent.
#
# Revision 1.12 2004/12/09 22:12:56 jimmyo
# Added "graph_period" option, to make "graph_sums" usable.
#
# Revision 1.11 2004/11/21 00:17:12 jimmyo
# Changed a lot of plugins so they use DERIVE instead of COUNTER.
#
# Revision 1.10 2004/11/20 23:58:22 jimmyo
# The linux/iostat plugin now ignores devices without traffic (Deb#267195).
#
# Revision 1.9 2004/09/25 22:29:16 jimmyo
# Added info fields to a bunch of plugins.
#
# Revision 1.8 2004/08/24 13:37:29 ilmari
# Add total line
#
# Revision 1.7 2004/05/20 13:57:12 jimmyo
# Set categories to some of the plugins.
#
# Revision 1.6 2004/02/02 18:18:07 jimmyo
# Changed to an informative vlabel, since the field.label information has been made shorter.
#
# Revision 1.5 2004/02/02 17:52:32 jimmyo
# Linux/iostat now shows only disks also on machines without devfs.
#
# Revision 1.4 2004/02/02 16:54:38 jimmyo
# Make the iostat plugin work properly.
#
# Revision 1.3 2004/02/02 16:53:53 jimmyo
# Make the iostat plugin work properly.
#
# Revision 1.2 2004/01/31 19:24:52 jimmyo
# Rewrite of linux/iostat by Mike Fedyk (Deb##223373,224113).
#
# Revision 1.1 2004/01/02 18:50:01 jimmyo
# Renamed occurrances of lrrd -> munin
#
# Revision 1.1.1.1 2004/01/02 15:18:07 jimmyo
# Import of LRRD CVS tree after renaming to Munin
#
# Revision 1.5 2003/12/18 18:09:32 jimmyo
# Added total line
#
# Revision 1.4 2003/12/18 11:01:51 jimmyo
# Fix by_dev compare issue.
#
# Revision 1.3 2003/12/16 17:51:08 jimmyo
# Plugin linux/iostat modified. Now runs on 2.6, and now "mirrors" i/o like eth* et al. (Deb#224113, Deb#223373)
#
# Revision 1.2 2003/11/07 17:43:16 jimmyo
# Cleanups and log entries
#
#
#
# Magic markers (optional - used by lrrd-config and some installation
# scripts):
#
#%# family=auto
#%# capabilities=autoconf
use strict;
use Data::Dumper;
# Where to get stats from
my $detailed_present = 0;
my $stat_present = 0;
# And md things here?
my $mdstat_present = 0;
if ( (-f '/proc/diskstats') or
(system("grep -q 'rio rmerge rsect ruse wio wmerge wsect wuse running use aveq' /proc/partitions") == 0) ) {
$detailed_present = 1;
} elsif (system("grep -q '^disk_io: [^ ]' /proc/stat") == 0) {
$stat_present = 1;
}
$mdstat_present = -f '/proc/mdstat';
if ( defined($ARGV[0]) and $ARGV[0] eq "autoconf") {
if ($mdstat_present and ($detailed_present or $stat_present)) {
print "yes\n";
exit 0;
}
print "no\n";
exit 1;
}
my %devs;
my %nametodev;
if ($detailed_present) {
&fetch_detailed;
} elsif ($stat_present) {
# Falling back to /proc/stat
&fetch_stat;
}
my $md = $0;
$md =~ s/.*_//;
open(MD,"/proc/mdstat");
my ($dev,$mdstatus,$raid,@devs);
while (<MD>) {
next unless /^$md/o;
($dev, $mdstatus) = split(/\s+:\s+/,$_,2);
($mdstatus, $raid, @devs) = split(/\s+/,$mdstatus);
last;
}
# print "DEVICES: ",join(', ',@devs),"\n";
# Remove unwanted things like raid device number, partition number
# and sort nicely.
@devs = sort by_dev map { s/\d*\[.*\]$//; $_; } @devs;
# Insert the raid device into the mix.
unshift(@devs,$md);
# And translate to the device name used by the datastructures.
@devs = map { $nametodev{$_}; } @devs;
close(MD);
my $i=0;
if ( $ARGV[0] and $ARGV[0] eq "config") {
print "graph_title IOstat for $md\n";
print "graph_args --base 1024 -l 0\n";
print "graph_vlabel blocks / \${graph_period} read (-) / written (+)\n";
print "graph_category disk\n";
print "graph_info This graph shows the I/O to and from block devices comprising the $raid device $md.\n";
my @grapho = @devs;
# The first shall be last
push(@grapho,shift(@grapho));
print "graph_order";
foreach my $key (@grapho) {
print " ", $key, "_read ", $key, "_write ";
}
print "\n";
foreach my $key (@devs) {
print $key . "_read.label $devs{$key}->{name}\n";
print $key . "_read.type DERIVE\n";
print $key . "_read.max 900000\n";
print $key . "_read.min 0\n";
print $key . "_read.graph no\n";
print $key . "_write.label $devs{$key}->{name}\n";
print $key . "_write.info I/O on device $devs{$key}->{name}\n";
print $key . "_write.type DERIVE\n";
print $key . "_write.max 900000\n";
print $key . "_write.min 0\n";
print $key . "_write.negative " . $key . "_read\n";
if ($i == 0) {
print "${key}_read.draw LINE2\n";
print "${key}_write.draw LINE2\n";
} elsif ($i == 1) {
print "${key}_read.draw AREA\n";
print "${key}_write.draw AREA\n";
} else {
print "${key}_read.draw STACK\n";
print "${key}_write.draw STACK\n";
}
$i++;
}
exit 0;
}
# print Dumper \%nametodev;
# print Dumper \%devs;
foreach my $key (@devs) {
# print "Device name: $key, iostat name: ",$nametodev{$key},"\n";
print $key, "_read.value ", $devs{$key}->{rsect}, "\n";
print $key, "_write.value ", $devs{$key}->{wsect}, "\n";
}
sub by_dev {
return $a cmp $b;
}
sub fetch_stat() {
open (IN, "/proc/stat") or die "Could not open /proc/stat for reading: $!\n";
while (<IN>) {
next unless (/^disk_io:\s*(.+)\s*/);
foreach my $dev (split /\s+/) {
next unless $dev =~ /\S/;
next unless ($dev =~ /\((\d+),(\d+)\):\(\d+,(\d+),(\d+),(\d+),(\d+)\)/);
my $name = "dev".$1."_".$2;
$nametodev{$name}=$name;
$devs{$name} = {
name => $name,
rio => $3,
rsect => $4,
wio => $5,
wsect => $6
};
}
}
close (IN);
}
my %maj_count;
sub get_disk_count()
{
my @disk_count;
my $major = $_[0];
$maj_count{$major} = 0 unless exists($maj_count{$major});
$disk_count[0] = $maj_count{$major}++;
die "Could not find disk_count for major: $major" unless (exists($disk_count[0]));
return $disk_count[0];
}
sub fetch_detailed() {
if (open(DETAILED, "/proc/diskstats")
or open(DETAILED, "/proc/partitions")) {
while (<DETAILED>) {
if (/^\s+(\d+)\s+\d+\s*\d*\s+([[:alpha:][:digit:]\/]+)\s+(.*)/) {
my @fields = split(/\s+/, $3);
my $tmpnam = $2;
my $major = $1;
if ($tmpnam =~ /^md\d+/) {
# That's fine, we want raid disks reported here.
} elsif ($tmpnam =~ /\d+$/ ) {
# Special case for devices like cXdXpX,
# like the cciss driver
next unless $tmpnam =~ /\/c\d+d\d+$/
}
next unless grep { $_ } @fields;
$tmpnam =~ s/\/[[:alpha:]]+(\d+)/\/$1/g;
$tmpnam =~ s/^([^\/]+)\//$1/;
$tmpnam =~ s/\/disc$//;
my $devnam = "dev".$major."_".&get_disk_count($major);
$nametodev{$tmpnam} = $devnam;
$devs{$devnam} = {
major => $major,
name => $tmpnam,
rio => $fields[0],
rmerge => $fields[1],
rsect => $fields[2],
ruse => $fields[3],
wio => $fields[4],
wmerge => $fields[5],
wsect => $fields[6],
wuse => $fields[7],
running => $fields[8],
use => $fields[9],
aveq => $fields[10]
};
}
}
close (DETAILED);
}
}
# vim:syntax=perl

View file

@ -0,0 +1,191 @@
#!/usr/bin/perl -w
#
# Munin plugin for MegaRAID
# This plugin can graph:- Currently Drive Temperature and Error Count
#
#---------------------
# Examples
# Create a symbolic link to MegaRaid_<AdapterNumber>_<temp|media|other|predictive>
# ln -s /usr/share/munin/plugins/MegaRaid_ /etc/munin/plugins/MegaRaid_0_temp
# graph temperature on adapter 0
#
# ln -s /usr/share/munin/plugins/MegaRaid_ /etc/munin/plugins/MegaRaid_0_error
# graph media errors on adapter 0
#
# ln -s /usr/share/munin/plugins/MegaRaid_ /etc/munin/plugins/MegaRaid_1_temp
# graph temperature on adapter 1
#
#---------------------
# Log
# Revision 0.1 2011/04/16 idobson
# -First version only basic support of the MegaRaid controller
#
# Revision 0.2 2011/04/17 fkatzenb
# -Added bash statement to remove the log file created each time MegaCli64 is ran
# -Added a few comments and visual changes
#
# Revision 1.0 2011/04/17 fkatzenb
# -Revamped Code to symbolic link for sensor type and future growth
#
# Revision 1.1 2011/04/17 fkatzenb
# -Revised scalling
#
# Revision 1.2 2011/04/28 fkatzenb
# -Added support for graph_info support
# -Added warning & critical alerts support
# -Added data info
#
# Revision 2.0 2011/04/29 fkatzenb
# -Added remaining support for SMART Errors
#
# Revision 2.1 2011/04/29 fkatzenb
# -Added version information for in the graph description
#
#
#---------------------
#
# Add the following to your /etc/munin/plugin-conf.d/munin-node:
#
# [MegaRaid_*]
# user root
#
#---------------------
#
#
# Magic markers (optional - used by munin-config and installation scripts):
#
#%# family=auto
#%# capabilities=autoconf
#
my $DisplayVer=2.1;
use strict;
use warnings;
my $DevID=0; #Device Number found
my $DevData=0; #Device Data found
# Parse out Adapter number and parameter desired from file name and remove whitespace
my $Parameters=`basename $0 | sed 's/^MegaRaid_//g' | tr '_' '-'` ;
chomp $Parameters;
my ($Adapter,$Type)=split(/-/,$Parameters);
# Locate MegaCli64 application and remove whitespace
my $Command=`which MegaCli64`;
chomp $Command;
# Use this to define future parameters to monitor
my %config = (
temp => {
lookfor => 'Drive Temperature :',
label => 'Temp',
title => "MegaRAID Adapter $Adapter: Drive Temperatures",
vtitle => 'Celsius',
graph_args => '--base 1000 -l 0',
warning => '55',
critical => '65',
info_tag => "Temperature (C)",
description => "Internal Temperatures for drives on Adapter $Adapter."
},
error => {
lookfor => 'Media Error Count: ',
label => 'Media Err',
title => "MegaRAID Adapter $Adapter: Media Errors (SMART)",
vtitle => 'Number of Errors',
graph_args => '--base 1000 -l 0',
warning => '',
critical => '',
info_tag => "Media Errors (SMART)",
description => "Number of SMART errors related to the drive's media on Adapter $Adapter."
},
other => {
lookfor => 'Other Error Count: ',
label => 'Other Err',
title => "MegaRAID Adapter $Adapter: Others Errors (SMART)",
vtitle => 'Number of Errors',
graph_args => '--base 1000 -l 0',
warning => '',
critical => '',
info_tag => "Other Errors (SMART)",
description => "Number of SMART errors not related to the drive's media on Adapter $Adapter."
},
predictive => {
lookfor => 'Predictive Failure Count: ',
label => 'Predictive Err',
title => "MegaRAID Adapter $Adapter: Predictive Errors (SMART)",
vtitle => 'Number of Errors',
graph_args => '--base 1000 -l 0',
warning => '',
critical => '',
info_tag => "Predictive Errors (SMART)",
description => "Number of SMART errors for each drive on Adapter $Adapter."
}
);
#Auto config options
if ($ARGV[0] and $ARGV[0] eq "autoconf" ) {
if (-e $Command ) {
print "yes\n";
exit 0;
} else {
print "no\n";
exit 1
}
}
#Read Output of MegaRaid command
$Command.=" -PDList -a".$Adapter;
my @Output=qx($Command);
#Munin Config Options
if ($ARGV[0] and $ARGV[0] eq "config"){
print "graph_title $config{$Type}->{title}\n";
print "graph_vtitle $config{$Type}->{vtitle}\n";
print "graph_args $config{$Type}->{graph_args}\n";
print "graph_scale yes\n";
print "graph_category disk\n";
print "graph_info $config{$Type}->{description} <br />Generated by MegaRaid_, Version $DisplayVer<br />\n";
foreach my $Line (@Output) {
$Line=~ s/\r//g;
$Line=~ s/\n//g;
#Find the device ID
if ( $Line=~ m/Slot Number: /i ) {
$DevID=$Line;
$DevID=~ s/Slot Number: //;
print "A".$Adapter."_D".$DevID."_$Type.label A$Adapter:D$DevID $config{$Type}->{label}\n";
print "A".$Adapter."_D".$DevID."_$Type.info Adapter: $Adapter / Drive: $DevID - $config{$Type}->{info_tag}\n";
if ($config{$Type}->{warning} ne '' ) {
print "A".$Adapter."_D".$DevID."_$Type.warning $config{$Type}->{warning}\n";
}
if ($config{$Type}->{critical} ne '') {
print "A".$Adapter."_D".$DevID."_$Type.critical $config{$Type}->{critical}\n";
}
}
}
exit 0;
}
#Actually dump the data
foreach my $Line (@Output) {
$Line=~ s/\r//g;
$Line=~ s/\n//g;
#Find the device ID
if ( $Line=~ m/Slot Number: /i ) { $DevID=$Line; $DevID=~ s/Slot Number: //; chomp $DevID; }
#Find the data and print it out
if ( $Line=~ m/$config{$Type}->{lookfor}/i ) {
$DevData=$Line;
$DevData=~s/$config{$Type}->{lookfor}//;
$DevData=~s/C.*//;
chomp $DevData;
print "A".$Adapter."_D".$DevID."_$Type.value $DevData\n";
}
}
#Remove log file created by running MegaCli
unlink "MegaSAS.log";
exit 0;

73
plugins/disk/raid Executable file
View file

@ -0,0 +1,73 @@
#!/usr/bin/perl -w
#
# (c) 2007 Nathan Rutman nathan@clusterfs.com
#
# Plugin to monitor RAID status
#
# Results are % of healthy drives in a raid device
# and % rebuilt of devices that are resyncing.
#
#%# family=contrib
#%# capabilities=autoconf
if ($ARGV[0] and $ARGV[0] eq "autoconf") {
if (-r "/proc/mdstat" and `grep md /proc/mdstat`) {
print "yes\n";
exit 0;
} else {
print "no RAID devices\n";
exit 1;
}
}
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
print "graph_title RAID status\n";
print "graph_category disk\n";
print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n";
print "graph_args --base 1000 -l 0\n";
print "graph_vlabel % healthy/rebuilt\n";
print "graph_scale no\n";
}
{
local( $/, *MDSTAT ) ;
open (MDSTAT, "/proc/mdstat") or exit 1;
#open (MDSTAT, "/etc/munin/plugins/sample.failed") or exit 1;
my $text = <MDSTAT>;
close MDSTAT;
# Should look like "active raid1 sda1[0] sdc1[2] sdb1[1]"
# Interestingly, swap is presented as "active (auto-read-only)"
while ($text =~ /(md\d+)\s+:\s+active\s+(\(auto-read-only\)\s+|)(\w+)\s+(.*)\n.*\[(\d+)\/(\d+)]\s+\[(\w+)]/ ) {
my($dev,$dummy,$type,$members,$nmem,$nact,$status) = ($1,$2,$3,$4,$5,$6,$7);
# print "$text\nitem: $dev $type ($members) status=$status \n";
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
print "$dev.label $dev\n";
print "$dev.info $type $members\n";
# 100: means less than 100
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
print "$dev.critical 98:\n";
print $dev, "_rebuild.label $dev rebuilt\n";
print $dev, "_rebuild.info $type\n";
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
print $dev, "_rebuild.critical 98:\n";
} else {
my $pct = 100 * $nact / $nmem;
my $rpct = 100;
if ( $pct < 100 ) {
my @output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`;
if( $output[0] =~ /([0-9]+)% complete/ ) {
$rpct = $1;
} else {
$rpct = 0;
}
}
print "$dev.value $pct\n";
print $dev, "_rebuild.value $rpct\n";
}
$text = $';
}
}
exit 0;

View file

@ -0,0 +1,59 @@
#!/bin/sh
# Detect and display Linux sw-raid mismatch count
# Copyright (C) 2011 Rory Jaffe <rsjaffe@gmail.com>
# derived from md_sync_speed by Kristian Lyngstøl
# Copyright (C) 2010 Kristian Lyngstøl <kristian@bohemians.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#####
#
# Magic Markers:
# #%# family=auto
# #%# capabilities=autoconf
targets=`ls /sys/devices/virtual/block/*/md/mismatch_cnt | cut -d/ -f6`
returnval=$?
if [ "x$1" = "xautoconf" ]; then
if [ -z "$targets" ]; then
echo "no (no md devices found under /sys/devices/virtual/block/*/md/mismatch_cnt)"
exit 1;
elif [ "x$returnval" != "x0" ]; then
echo "no (discovery of md devices failed strangely)"
exit 1;
else
echo "yes"
exit 0
fi
fi
if [ "x$1" = "xconfig" ]; then
cat << __EOF__
graph_title Software-raid mismatch count
graph_args -l 0
graph_info Display mismatch count of software raid devices
graph_category disk
graph_vlabel Count
__EOF__
for target in $targets; do
echo "$target.label $target"
done
exit
fi
for target in $targets; do
echo $target.value $(cat /sys/devices/virtual/block/$target/md/sync_completed)
done

246
plugins/disk/scsi_queue Executable file
View file

@ -0,0 +1,246 @@
#!/usr/bin/env python
"""
Munin plugin which reports queue busy-values per online SCSI
device on Linux, as seen in /proc/scsi/sg/devices
If the busy-values often reach the queue depth of the device,
one might consider increasing the queue depth. Hence, this
plugin.
Wildcard use:
If your system has many SCSI-like devices, filtering may be needed
to make the resulting graphs readable.
If you symlink the plugin, so that it's executed as
scsi_queue_X_through_Y
then the plugin will only look at devices
/dev/sdX .. /dev/sdY
X and Y may only be one-character values.
X and Y are translated into a regular expression like:
sd[X-Y]
"""
# Author: Troels Arvin <tra@sst.dk>
# See http://troels.arvin.dk/code/munin/ for latest version.
# Only tested with Red Hat Enterprise Linux 5 / CentOS 5, currently.
# Released according to the "New BSD License" AKA the 3-clause
# BSD License:
# ====================================================================
# Copyright (c) 2010, Danish National Board of Health.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the the Danish National Board of Health nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY the Danish National Board of Health ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL the Danish National Board of Health BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ====================================================================
# $Id: scsi_queue 13630 2010-08-31 15:29:14Z tra $
# Note to self:
# The fields in /proc/scsi/sg/devices are:
# host chan id lun type opens qdepth busy online
# TODO:
# - Make it possible to group by multipath group. Might be
# hard, though, because determining path groups seems
# to require root privileges.
# - Support autoconf
# - How to support filtering on installations which have
# many SCSI devices, beyond /dev/sdz?
import os, sys, re
procfile = '/proc/scsi/sg/devices'
sysfs_base = '/sys/bus/scsi/devices'
my_canonical_name = 'scsi_queue' # If called as - e.g. - scsi_queue_foo, then
# foo will be interpreted as a device filter.
# For this, we need a base name.
def bailout(msg):
sys.stderr.write(msg+"\n")
sys.exit(1)
def print_config(devices,filter_from,filter_through):
title_qualification = ''
if filter_from and filter_through:
title_qualification = ' for devices sd%s through sd%s' % (filter_from,filter_through)
print 'graph_title SCSI queue busy values' + title_qualification
print 'graph_vlabel busy count'
print 'graph_args --base 1000 -l 0'
print 'graph_category disk'
print 'graph_info This graph shows the queue busy values, as seen in /prod/scsi/sg/devices'
keys = devices.keys()
keys.sort()
for key in keys:
qdepth = devices[key]['qdepth']
print '%s.min 0' % key
print '%s.type GAUGE' % key
print '%s.label %s (%s %s); qdepth=%s' % (
key,
key,
devices[key]['vendor'],
devices[key]['model'],
qdepth
)
print '%s.max %s' % (key,qdepth)
# Return a list of lists representing interesting parts from procfile
def parse_procfile():
retval = []
try:
fh = open(procfile)
for line in fh:
retval.append(line.split())
except IOError, e:
bailout('IO error: '+str(e))
return retval
# Try to read a file's content. If any I/O problem: return empty string
def readfile(path):
try:
f = open(path)
retval = f.read().rstrip()
f.close()
except IOError, e:
return ''
return retval
# Return dict of dicts, indexed by device name
def map_procentries_to_devices(list_of_dicts,devfilter_regex):
device_dict={}
if devfilter_regex:
regex_compiled = re.compile(devfilter_regex)
for elem in list_of_dicts:
# In /sys/bus/scsi/devices we see a number of directory
# entries, such as:
# 0:0:0:0
# 2:0:0:0
# 3:0:0:0
#
# The colon-separated values map to the first four parts
# of /proc/scsi/sg/devices
# And the directory entries are symlinks which point to directories
# in /sys/devices. By following a symlink, we may end up in
# a directory which contains directory entries like:
# - block:sdb
# ...
# - model
# ...
# - vendor
sys_pathname = sysfs_base + '/' + ':'.join(elem[:4]) # isolate stuff like 2:0:0:0
# Should actually not happen, but nontheless:
if not os.path.islink(sys_pathname):
continue
# Search for dirent called block:SOMETHING
# Put SOMETHING into blockdev_name
# Couldn't make glob.glob() work: The length of the result
# of glob() returned TypeError: len() of unsized object on
# RHEL 5's python...
dirents = os.listdir(sys_pathname)
num_blocklines=0
for dirent in dirents:
if dirent.startswith('block:'):
block_line = dirent
num_blocklines += 1
if num_blocklines == 0:
continue
if num_blocklines > 1:
bailout("Got more than one result when globbing for '%s'" % glob_for)
blockdev_name = block_line.split(':')[1]
# If device filtering is active, filter now
if devfilter_regex:
if not regex_compiled.match(blockdev_name):
continue
# Merge info from the /proc and /sys sources
device_dict[blockdev_name] = {
'model' : readfile(sys_pathname+'/model'),
'vendor': readfile(sys_pathname+'/vendor'),
'qdepth': elem[6],
'busy' : elem[7]
}
return device_dict
def print_values(devices):
devnames = devices.keys()
devnames.sort()
retval = ''
for devname in devnames:
print "%s.value %s" % (
devname,
devices[devname]['busy']
)
# Initial sanity check
n_args=len(sys.argv)
if n_args > 2:
# At most one arg expected
print '%d arguments given - expecting only one' % n_args
sys.exit(1)
# See if we were called with a Munin wildcard-style 'arg0-argument'
# E.g., if called as scsi_queue_a_through_c, then consider only
# devices sda, sdb, sdc.
devfilter_regex = None
called_as = os.path.basename(sys.argv[0])
match = re.match(my_canonical_name+'_([^_])_through_([^_])', called_as)
filter_from = None
filter_through = None
if match:
filter_from = match.group(1)
filter_through = match.group(2)
devfilter_regex = 'sd['+filter_from+'-'+filter_through+']'
# Perform main piece of work
devices = map_procentries_to_devices(
parse_procfile(),
devfilter_regex
)
# See how we were called
if n_args == 2:
# An argument was given, so let's not simply print
# values.
arg = sys.argv[1]
if arg == 'config':
print_config(devices,filter_from,filter_through)
sys.exit(0)
else:
print "Unknown argument '%s'" % arg
sys.exit(1)
# No arguments given; print values
print_values(devices)

78
plugins/disk/smart Executable file
View file

@ -0,0 +1,78 @@
#!/usr/bin/perl
#
# Plugin to monitor all S.M.A.R.T. capable disks
# author: paulv@dds.nl / paulv@bikkel.org
# licence : public domain
#
# Usage: copy or link into /etc/munin/plugins/ as smart_[device] ( smart_sg0 for example)
# Run as root
#
# Parameters:
#
# config (required)
# autoconf (optional - used by munin-config)
#
# Magic markers (optional - used by munin-config and some installation
# scripts):
#
#%# family=manual
#%# capabilities=autoconf
#
use strict;
my $device = "/dev/$1" if ( $0 =~ /[\w_-]+_(\w+\d+)$/ );
my $smartctl = 'smartctl';
my $smartctl_param = ' --attributes ';
my %attr;
if ( $ARGV[0] and $ARGV[0] eq "autoconf" ) {
print "yes\n";
exit 0;
}
open(SMART,"$smartctl $smartctl_param $device |") || die $!;
while(<SMART>) {
chop;
if ( m/\s*(\d+)\s+([\w_-]+)\s+(\d+x.+)\s+(\d+)\s+(\d+)\s+(\d+)\s+([\w_-]+)\s+(\w+)\s+([\w_-]+)\s+(\d+)/ ) {
my $key = $1 . '_' . $2;
my $rawvalue = $10;
$key = "170_Reserved_Block_Count" if $key eq "170_Unknown_Attribute";
$key = "171_Program_Fail_Count" if $key eq "171_Unknown_Attribute";
$key = "172_Erase_Fail_Count" if $key eq "172_Unknown_Attribute";
$key = "173_Wear_Leveling_Count" if $key eq "173_Unknown_Attribute";
$key = "174_Unexpected_Pwr_Loss" if $key eq "174_Unknown_Attribute";
$key = "189_High_Fly_Writes" if $key eq "189_Unknown_Attribute";
$key = "202_TA_Increase_Count" if $key eq "202_Unknown_Attribute";
$key = "206_Flying_Height" if $key eq "206_Unknown_Attribute";
$attr{$key} = $rawvalue;
}
}
if ( $ARGV[0] and $ARGV[0] eq "config" )
{
print "graph_title SMART values for $device\n";
print "graph_args --base 1000 -l 0\n";
print "graph_category disk\n";
print "graph_vlabel value\n";
print "graph_scale no\n";
print "graph_total Total\n";
foreach my $i (keys %attr)
{
print "$i.label smartattribute $i\n";
print "$i.draw LINE2\n";
print "$i.min 0\n";
}
exit 0;
}
foreach my $k (keys %attr) {
print $k . ".value " . $attr{$k} . "\n";
}
# end

52
plugins/disk/smart-by-id_ Executable file
View file

@ -0,0 +1,52 @@
#!/bin/bash
DISK=${0/*smart-by-id_/}
SMARTCTL="`which smartctl | head -1` $SMARTOPTS"
echo "# $DISK"
export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
case $1 in
config)
echo 'graph_title S.M.A.R.T values for drive '`readlink -f /dev/disk/by-id/$DISK`
echo 'graph_vlabel Attribute S.M.A.R.T value'
echo 'graph_args --base 1000 --lower-limit 0'
echo 'graph_category disk'
echo 'graph_info This graph shows the value of all S.M.A.R.T attributes of drive '`$SMARTCTL -i /dev/disk/by-id/$DISK | grep -Ei 'model|serial|firmware' | sed -re 's/.+?: +//gm' | tr '\n' ' '`
echo 'smartctl_exit_status.label smartctl exit value'
echo 'smartctl_exit_status.draw LINE2'
echo 'smartctl_exit_class.label smartctl exit status'
echo 'smartctl_exit_class.draw AREA'
echo 'smartctl_exit_class.warning :0'
echo 'smartctl_exit_class.critical :1'
$SMARTCTL -A /dev/disk/by-id/$DISK | grep 0x | while read
do
OP=($REPLY)
ON=`echo -n ${OP[1]} | tr -c '[A-z0-9]' '_'`
OL=`echo -n ${OP[1]} | tr '_' ' '`
echo ${ON}.label ${OL}
echo ${ON}.draw LINE2
echo ${ON}.critical ${OP[5]}:
done
;;
suggest)
ls -1 /dev/disk/by-id/scsi-* | grep -v part | grep -o scsi.*
;;
"")
$SMARTCTL -a /dev/disk/by-id/$DISK &> /dev/null
SES=$?
echo "smartctl_exit_status.value $SES"
if [ $SES -gt 0 ]
then
if [ $((SES & 7)) -gt 0 ] ; then exit 1 ; fi
if [ $((SES & 24)) -gt 0 ] ; then SES=2 ; fi
if [ $((SES & 224)) -gt 0 ] ; then SES=1 ; fi
fi
echo "smartctl_exit_class.value $SES"
$SMARTCTL -A /dev/disk/by-id/$DISK | awk '/0x/ { gsub(/[^a-zA-Z0-9]/,"_",$2); print $2.".value",$4; }'
;;
esac
#exit 0

585
plugins/disk/smart_ Executable file
View file

@ -0,0 +1,585 @@
#!/usr/bin/env python
# -*- encoding: iso-8859-1 -*-
#
# Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl,
# which is part of smartmontools package:
# http://smartmontools.sourceforge.net/
#
# To monitor a S.M.A.R.T device, link smart_<device> to this file.
# E.g.
# ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
# ...will monitor /dev/hda.
#
# Needs following minimal configuration in plugin-conf.d/munin-node:
# [smart_*]
# user root
# group disk
#
# Parameters
# smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
# smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
# ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
#
# Parameters can be specified on a per-drive basis, eg:
# [smart_hda]
# user root
# group disk
# env.smartargs -H -c -l error -l selftest -l selective -d ata
# env.smartpath /usr/local/sbin/smartctl
#
# [smart_twa0-1]
# user root
# group disk
# env.smartargs -H -l error -d 3ware,1
# env.ignorestandby True
#
# [smart_twa0-2]
# user root
# group disk
# env.smartargs -H -l error -d 3ware,2
#
# Author: Nicolas Stransky <Nico@neo-lan.net>
#
# v1.0 22/08/2004 - First draft
# v1.2 28/08/2004 - Clean up the code, add a verbose option
# v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code
# v1.4 17/11/2004 - Deal with non zero exit codes of smartctl
# - config now prints the critical thresholds, as reported by smartctl
# v1.5 18/11/2004 - Plot smartctl_exit_code bitmask
# v1.6 21/11/2004 - Add autoconf and suggest capabilities
# - smartctl path can be passed through "smartpath" environment variable
# - Additional smartctl args can be passed through "smartargs" environment variable
# v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS.
# - Allow to override completely the smartctl arguments with "smartargs"
# v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals.
# v1.9 07/07/2005 - Allow to query several drives on the same 3ware card.
# - Correct a bug when '-i' was not listed in smartargs
# - Don't fail if no value was obtained for hard drive model
# v1.10 19/08/2005 - smartctl_exit_code is now a numerical value
# v2.0 08/05/2009 - Correct bug in the interpretation of smartctl_exit_code
# - New option to suppress SMART warnings in munin
# - Temporary lack of output for previously existing drive now reports U
# - The plugin now contains its own documentation for use with munindoc
# - Removed python<2.2 compatibility comments
# - Better autodetection of drives
# - Don't spin up devices in a low-power mode.
#
# Copyright (c) 2004-2009 Nicolas Stransky.
#
# Permission to use, copy, and modify this software with or without fee
# is hereby granted, provided that this entire notice is included in
# all source code copies of any software which is or includes a copy or
# modification of this software.
#
# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
# IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
# REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
# PURPOSE.
#
#
# Magic markers
#%# capabilities=autoconf suggest
#%# family=auto
## You may edit the following 3 variables
# Increase verbosity (True/False)
verbose=False
# Suppress SMART warnings (True/False)
report_warnings=True
# Modify to your needs:
statefiledir='/var/lib/munin/plugin-state/'
# You may not modify anything below this line
import os, sys, string, pickle
from math import log
plugin_version="2.0"
def verboselog(s):
global plugin_name
sys.stderr.write(plugin_name+': '+s+'\n')
if not verbose :
verboselog = lambda s: None
def read_values(hard_drive):
global smart_values, emptyoutput
try :
verboselog('Reading S.M.A.R.T values')
os.putenv('LC_ALL','C')
smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+(os.getenv('ignorestandby',False) and ' ' or ' -n standby ')+'-A -i /dev/'+hard_drive)
read_values=0
for l in smart_output :
if l[:-1]=='' :
read_values=0
elif l[:13]=='Device Model:' or l[:7]=='Device:' :
model_list=string.split(string.split(l,':')[1])
try: model_list.remove('Version')
except : None
model=string.join(model_list)
if read_values==1 :
smart_attribute=string.split(l)
smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[3],"threshold":smart_attribute[5]}
elif l[:18]=="ID# ATTRIBUTE_NAME" :
# Start reading the Attributes block
read_values=1
exit_status=smart_output.close()
if exit_status!=None :
# smartctl exit code is a bitmask, check man page.
num_exit_status=int(exit_status/256) # Python convention
if int(log(num_exit_status,2))<=2 : # bit code
verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+')')
else :
verboselog('smartctl exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+'). '+hard_drive+' may be FAILING RIGHT NOW!')
else :
num_exit_status=0
except :
verboselog('Cannot access S.M.A.R.T values! Check user rights or propper smartmontools installation/arguments.')
sys.exit(1)
if smart_values=={} :
verboselog('Can\'t find any S.M.A.R.T values in smartctl output!')
emptyoutput=True
#sys.exit(1)
else : emptyoutput=False
smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"}
try : smart_values["model"]=model
# For some reason we may have no value for "model"
except : smart_values["model"]="unknown"
return(exit_status)
def open_state_file(hard_drive,mode) :
global statefiledir
return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode)
def update_state_file(hard_drive) :
try:
verboselog('Saving statefile')
pickle.dump(smart_values,open_state_file(hard_drive,"w"))
except :
verboselog('Error trying to save state file! Check access rights')
def print_plugin_values(hard_drive) :
global emptyoutput, smart_values
if not emptyoutput:
verboselog('Printing S.M.A.R.T values')
for key in smart_values.keys() :
if key=="model" : continue
print(key+".value "+smart_values[key]["value"])
else:
print_unknown_from_statefile(hard_drive,smart_values)
def print_config(hard_drive) :
global report_warnings, smart_values, statefiledir
if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
try :
verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
except :
verboselog('Error opening existing state file!')
sys.exit(1)
else :
verboselog('No state file, reading S.M.A.R.T values for the first time')
read_values(hard_drive[0])
pickle.dump(smart_values,open_state_file(hard_drive,"w"))
smart_values_state=smart_values
verboselog('Printing configuration')
print('graph_title S.M.A.R.T values for drive '+string.join(hard_drive,","))
print('graph_vlabel Attribute S.M.A.R.T value')
print('graph_args --base 1000 --lower-limit 0')
print('graph_category disk')
print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.')
attributes=smart_values_state.keys()
attributes.sort()
for key in attributes :
if key in ['smartctl_exit_status','model'] : continue
print(key+'.label '+key)
print(key+'.draw LINE2')
if report_warnings: print(key+'.critical '+smart_values_state[key]["threshold"]+':')
print('smartctl_exit_status.label smartctl_exit_status')
print('smartctl_exit_status.draw LINE2')
if report_warnings: print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"])
def print_unknown_from_statefile(hard_drive,smart_values) :
global statefiledir
if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
try :
verboselog('Failed to get S.M.A.R.T values from drive. Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
except :
verboselog('Error opening existing state file!')
sys.exit(1)
else :
verboselog('No state file, reading S.M.A.R.T values for the first time')
exit(1)
verboselog('Printing unknown values for all attributes in state file')
attributes=smart_values_state.keys()
attributes.sort()
for key in attributes :
if key=='model' : continue
print(key+'.value U')
def get_hard_drive_name() :
global plugin_name
try :
name=[plugin_name[string.rindex(plugin_name,'_')+1:]]
if os.uname()[0]=="SunOS" :
try :
# if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path
if name[0][0:4]=="rdsk":
name[0]=os.path.join("rdsk",name[0][4:])
elif name[0][0:3]=="rmt":
name[0]=os.path.join("rmt",name[0][3:])
except :
verboselog('Failed to find SunOS hard_drive')
# For 3ware cards, we have to set multiple plugins for the same hard drive name.
# Let's see if we find a '-' in the drive name.
if name[0].find('-')!=-1:
# Put the drive name and it's number in a list
name=[name[0][:string.rindex(name[0],'-')],name[0][string.rindex(name[0],'-')+1:]]
# Chech that the drive exists in /dev
if not os.path.exists('/dev/'+name[0]):
verboselog('/dev/'+name[0]+' not found!')
sys.exit(1)
return(name)
except :
verboselog('No S.M.A.R.T device name found in plugin\'s symlink!')
sys.exit(1)
def find_smart_drives() :
global emptyoutput
# Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card.
drives=[]
if os.uname()[0]=="Linux" :
if os.path.exists('/sys/block/'):
# Running 2.6
try :
for drive in os.listdir('/sys/block/') :
if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices.
try :
verboselog('Trying '+drive+'...')
exit_status=read_values(drive)
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
drives.append(drive)
except :
continue
except :
verboselog('Failed to list devices in /sys/block')
else :
verboselog('Not running linux2.6, failing back to /proc/partitions')
try :
partitions=open('/proc/partitions','r')
L=partitions.readlines()
for l in L :
words=string.split(l)
if len(words)==0 or words[0][0] not in string.digits : continue
if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices
if words[-1][-1] not in string.digits :
try :
verboselog('Trying '+words[-1]+'...')
exit_status=read_values(words[-1])
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
drives.append(words[-1])
except :
continue
verboselog('Found drives in /proc/partitions ! '+str(drives))
except :
verboselog('Failed to list devices in /proc/partitions')
elif os.uname()[0]=="OpenBSD" :
try :
sysctl_kerndisks=os.popen('sysctl hw.disknames')
kerndisks=string.strip(sysctl_kerndisks.readline())
for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') :
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
try :
verboselog('Trying '+drive+'c...')
exit_status=read_values(drive+'c')
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
drives.append(drive+'c')
except :
continue
except :
verboselog('Failed to list OpenBSD disks')
elif os.uname()[0]=="FreeBSD" :
try :
sysctl_kerndisks=os.popen('sysctl kern.disks')
kerndisks=string.strip(sysctl_kerndisks.readline())
for drive in string.split(kerndisks)[1:] :
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
try :
verboselog('Trying '+drive+'...')
exit_status=read_values(drive)
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
drives.append(drive)
except :
continue
except :
verboselog('Failed to list FreeBSD disks')
elif os.uname()[0]=="NetBSD" :
try :
sysctl_kerndisks=os.popen('sysctl hw.disknames')
kerndisks=string.strip(sysctl_kerndisks.readline())
for drive in string.split(kerndisks)[2:] :
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
try :
verboselog('Trying '+drive+'c...')
exit_status=read_values(drive+'c')
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
drives.append(drive+'c')
except :
continue
except :
verboselog('Failed to list NetBSD disks')
elif os.uname()[0]=="SunOS" :
try :
from glob import glob
for drivepath in glob('/dev/rdsk/*s2') :
try :
drive=os.path.basename(drivepath)
verboselog('Trying rdsk'+drive+'...')
exit_status=read_values('rdsk'+drive)
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
drives.append('rdsk'+drive)
except :
continue
for drivepath in glob('/dev/rmt/*') :
try :
drive=os.path.basename(drivepath)
verboselog('Trying rmt'+drive+'...')
exit_status=read_values('rmt'+drive)
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
drives.append('rmt'+drive)
except :
continue
except :
verboselog('Failed to list SunOS disks')
return(drives)
### Main part ###
smart_values={}
emptyoutput=False
plugin_name=list(os.path.split(sys.argv[0]))[1]
verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid()))
# Parse arguments
if len(sys.argv)>1 :
if sys.argv[1]=="config" :
hard_drive=get_hard_drive_name()
print_config(hard_drive)
sys.exit(0)
elif sys.argv[1]=="autoconf" :
if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) :
print('yes')
sys.exit(0)
else :
print('no (smartmontools not found)')
sys.exit(1)
elif sys.argv[1]=="suggest" :
for drive in find_smart_drives() :
print(drive)
sys.exit(0)
elif sys.argv[1]=="version" :
print('smart_ Munin plugin, version '+plugin_version)
sys.exit(0)
elif sys.argv[1]!="" :
verboselog('unknown argument "'+sys.argv[1]+'"')
sys.exit(1)
# No argument given, doing the real job:
hard_drive=get_hard_drive_name()
read_values(hard_drive[0])
if not emptyoutput: update_state_file(hard_drive)
print_plugin_values(hard_drive)
exit(0)
### The following is the smart_ plugin documentation, intended to be used with munindoc
"""
=head1 NAME
smart_ - Munin wildcard-plugin to monitor S.M.A.R.T. attribute values through smartctl
=head1 APPLICABLE SYSTEMS
Node with B<Python> interpreter and B<smartmontools> (http://smartmontools.sourceforge.net/)
installed and in function.
=head1 CONFIGURATION
=head2 Create link in service directory
To monitor a S.M.A.R.T device, create a link in the service directory
of the munin-node named smart_<device>, which is pointing to this file.
E.g.
ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
...will monitor /dev/hda.
=head2 Grant privileges in munin-node
The plugin must be run under high privileged user B<root>, to get access to the raw device.
So following minimal configuration in plugin-conf.d/munin-node is needed.
=over 2
[smart_*]
user root
group disk
=back
=head2 Set Parameter if needed
smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
Parameters can be specified on a per-drive basis, eg:
=over 2
[smart_hda]
user root
env.smartargs -H -c -l error -l selftest -l selective -d ata
env.smartpath /usr/local/sbin/smartctl
=back
In particular, for SATA drives, with older versions of smartctl:
=over 2
[smart_sda]
user root
env.smartargs -d ata -a
[smart_twa0-1]
user root
env.smartargs -H -l error -d 3ware,1
env.ignorestandby True
[smart_twa0-2]
user root
env.smartargs -H -l error -d 3ware,2
=back
=head1 INTERPRETATION
If a device supports the B<Self-Monitoring, Analysis
and Reporting Technology (S.M.A.R.T.)> it offers readable
access to the attribute table. There you find the B<raw value>,
a B<normalised value> and a B<threshold> (set by the vendor)
for each attribute, that is supported by that device.
The meaning and handling of the raw value is a secret of the
vendors embedded S.M.A.R.T.-Software on the disk. The only
relevant info from our external view is the B<normalised value>
in comparison with the B<threshold>. If the attributes value is
equal or below the threshold, it signals its failure and
the B<health status> of the device will switch from B<passed> to B<failed>.
This plugin fetches the B<normalised values of all SMART-Attributes>
and draw a curve for each of them.
It takes the vendors threshold as critical limit for the munin datafield.
So you will see an alarm, if the value reaches the vendors threshold.
Looking at the graph: It is a bad sign, if the curve starts
to curl or to meander. The more horizontal it runs,
the better. Of course it is normal, that the temperatures
curve swings a bit. But the others should stay steady on
their level if everything is ok.
S.M.A.R.T. distinguishes between B<Pre-fail> and B<Old-age>
Attributes. An old disk will have more curling curves
because of degradation, especially for the B<Old-age> Attributes.
You should then backup more often, run more selftests[1] and prepare
the disks replacement.
B<Act directly>, if a <Pre-Fail> Attribute goes below threshold.
Immediately back-up your data and replace your hard disk drive.
A failure may be imminent..
[1] Consult the smartmontools manpages to learn about
offline tests and automated selftests with smartd.
Only with both activated, the values of the SMART-Attributes
reflect the all over state of the device.
Tutorials and articles about S.M.A.R.T. and smartmontools:
http://smartmontools.sourceforge.net/doc.html#tutorials
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf suggest
=head1 CALL OPTIONS
B<none>
=over 2
Fetches values if called without arguments:
E.g.: munin-run smart_hda
=back
B<config>
=over 2
Prints plugins configuration.
E.g.: munin-run smart_hda config
=back
B<autoconf>
=over 2
Tries to find smartctl and outputs value 'yes' for success, 'no' if not.
It's used by B<munin-node-configure> to see wether autoconfiguration is possible.
=back
B<suggest>
=over 2
Outputs the list of device names, that it found plugged to the system.
B<munin-node-configure> use this to build the service links for this wildcard-plugin.
=back
=head1 VERSION
Version 2.0
=head1 BUGS
None known
=head1 AUTHOR
(C) 2004-2009 Nicolas Stransky <Nico@stransky.cx>
(C) 2008 Gabriele Pohl <contact@dipohl.de>
Reformated existent documentation to POD-Style, added section Interpretation to the documentation.
=head1 LICENSE
GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt)
=cut
"""

View file

@ -0,0 +1,175 @@
#!/usr/bin/perl
=head1 NAME
snmp__netapp_diskusage_ - Munin plugin to retrieve file systems usage on
NetApp storage appliances.
=head1 APPLICABLE SYSTEMS
File systems usage stats should be reported by any NetApp storage
appliance with SNMP agent daemon activated. See na_snmp(8) for details.
=head1 CONFIGURATION
Unfortunately, SNMPv3 is not fully supported on all NetApp equipments.
For this reason, this plugin will use SNMPv2 by default, which is
insecure because it doesn't encrypt the community string.
The following parameters will help you get this plugin working :
[snmp_*]
env.community MyCommunity
If your community name is 'public', you should really worry about
security and immediately reconfigure your appliance.
Please see 'perldoc Munin::Plugin::SNMP' for further configuration.
=head1 INTERPRETATION
The plugin reports file systems usage. This can help you monitoring file
systems usage in a given period of time.
=head1 MIB INFORMATION
This plugin requires support for the NETWORK-APPLIANCE-MIB issued by
Network Appliance. It reports the content of the DfEntry OID.
=head1 MAGIC MARKERS
#%# family=snmpauto
#%# capabilities=snmpconf
=head1 VERSION
v1.0 - 06/22/2009 14:05:03 CEST
Initial revision
=head1 AUTHOR
This plugin is copyright (c) 2009 by Guillaume Blairon.
NetApp is a registered trademark and Network Appliance is a trademark
of Network Appliance, Inc. in the U.S. and other countries.
=head1 BUGS
This plugin wasn't tested on many hardware. If you encounter bugs,
please report them to Guillaume Blairon E<lt>L<g@yom.be>E<gt>.
=head1 LICENSE
GPLv2 or (at your option) any later version.
=cut
use strict;
use warnings;
use Munin::Plugin::SNMP;
use vars qw($DEBUG);
$DEBUG = $ENV{'MUNIN_DEBUG'};
my @palette =
#Better colours from munin 1.3.x
#Greens Blues Oranges Dk yel Dk blu Purple Lime Reds Gray
qw(00CC00 0066B3 FF8000 FFCC00 330099 990099 CCFF00 FF0000 808080
008F00 00487D B35A00 B38F00 6B006B 8FB300 B30000 BEBEBE
80FF80 80C9FF FFC080 FFE680 AA80FF EE00CC FF8080
666600 FFBFFF 00FFCC CC6699 999900);
my %oids = (
# - dfHigh.* : 32 most significant bits counters
# - dfLow.* : 32 least significant bits counters
dfHighTotalKBytes => '1.3.6.1.4.1.789.1.5.4.1.14.',
dfLowTotalKBytes => '1.3.6.1.4.1.789.1.5.4.1.15.',
dfHighUsedKBytes => '1.3.6.1.4.1.789.1.5.4.1.16.',
dfLowUsedKBytes => '1.3.6.1.4.1.789.1.5.4.1.17.',
dfHighAvailKBytes => '1.3.6.1.4.1.789.1.5.4.1.18.',
dfLowAvailKBytes => '1.3.6.1.4.1.789.1.5.4.1.19.',
);
sub to_32bit_int {
my ($l, $h) = @_;
return "U" if ((!defined $l) || (!defined $h));
my $bin = unpack( 'B32', pack('N', $l) . pack('N', $h) );
return unpack( 'N', pack('B32', $bin) );
}
if (defined $ARGV[0] and $ARGV[0] eq 'snmpconf') {
print "number 1.3.6.1.4.1.789.1.5.6.0\n";
print "index 1.3.6.1.4.1.789.1.5.4.1.1.\n";
foreach (keys %oids) {
print "require $oids{$_} [0-9]\n";
}
exit 0;
}
my $session = Munin::Plugin::SNMP->session();
my ($host, undef, undef, $tail) = Munin::Plugin::SNMP->config_session();
my ($df_id, $name_oid);
if ($tail =~ /^netapp_diskusage_(\d+)$/) {
$df_id = $1;
$name_oid = '1.3.6.1.4.1.789.1.5.4.1.2.' . $df_id;
} else {
die "Couldn't understand what I'm supposed to monitor";
}
if (defined $ARGV[0] and $ARGV[0] eq "config") {
my $df_name = $session->get_single($name_oid);
print "host_name $host\n" unless $host eq 'localhost';
print "graph_title $host disk usage on $df_name\n";
print "graph_args --base 1024 --lower-limit 0\n";
print "graph_vlabel bytes\n";
print "graph_category disk\n";
print "graph_info This graph shows the disk usage for $df_name on NetApp host $host\n";
print "graph_order used avail total\n";
print "used.info The total disk space in KBytes that is in use on the $df_name file system.\n";
print "used.type GAUGE\n";
print "used.draw AREA\n";
print "used.label Used\n";
print "used.cdef used,1024,*\n";
print "used.min 0\n";
print "used.colour $palette[1]\n";
print "avail.info The total disk space in KBytes that is free for use on the $df_name file system.\n";
print "avail.type GAUGE\n";
print "avail.draw STACK\n";
print "avail.label Available\n";
print "avail.cdef avail,1024,*\n";
print "avail.min 0\n";
print "avail.colour $palette[3]\n";
print "total.info The total capacity in KBytes for the $df_name file system.\n";
print "total.type GAUGE\n";
print "total.draw LINE2\n";
print "total.label Total\n";
print "total.cdef total,1024,*\n";
print "total.min 0\n";
print "total.colour $palette[7]\n";
exit 0;
}
my $used_l = $session->get_single($oids{dfLowUsedKBytes}.$df_id);
my $used_h = $session->get_single($oids{dfHighUsedKBytes}.$df_id);
my $avail_l = $session->get_single($oids{dfLowAvailKBytes}.$df_id);
my $avail_h = $session->get_single($oids{dfHighAvailKBytes}.$df_id);
my $total_l = $session->get_single($oids{dfLowTotalKBytes}.$df_id);
my $total_h = $session->get_single($oids{dfHighTotalKBytes}.$df_id);
my $used = to_32bit_int($used_l, $used_h);
my $avail = to_32bit_int($avail_l, $avail_h);
my $total = to_32bit_int($total_l, $total_h);
print "used.value $used\n";
print "avail.value $avail\n";
print "total.value $total\n";
exit 0;
__END__

View file

@ -0,0 +1,144 @@
#!/usr/bin/perl
=head1 NAME
snmp__netapp_inodeusage_ - Munin plugin to retrieve inodes usage on
NetApp storage appliances.
=head1 APPLICABLE SYSTEMS
Inodes usage stats should be reported by any NetApp storage appliance
with SNMP agent daemon activated. See na_snmp(8) for details.
=head1 CONFIGURATION
Unfortunately, SNMPv3 is not fully supported on all NetApp equipments.
For this reason, this plugin will use SNMPv2 by default, which is
insecure because it doesn't encrypt the community string.
The following parameters will help you get this plugin working :
[snmp_*]
env.community MyCommunity
If your community name is 'public', you should really worry about
security and immediately reconfigure your appliance.
Please see 'perldoc Munin::Plugin::SNMP' for further configuration.
=head1 MIB INFORMATION
This plugin requires support for the NETWORK-APPLIANCE-MIB issued by
Network Appliance. It reports the content of the DfEntry OID.
=head1 MAGIC MARKERS
#%# family=snmpauto
#%# capabilities=snmpconf
=head1 VERSION
v1.0 - 06/22/2009 14:05:03 CEST
Initial revision
=head1 AUTHOR
This plugin is copyright (c) 2009 by Guillaume Blairon.
NetApp is a registered trademark and Network Appliance is a trademark
of Network Appliance, Inc. in the U.S. and other countries.
=head1 BUGS
This plugin wasn't tested on many hardware. If you encounter bugs,
please report them to Guillaume Blairon E<lt>L<g@yom.be>E<gt>.
=head1 LICENSE
GPLv2 or (at your option) any later version.
=cut
use strict;
use warnings;
use Munin::Plugin::SNMP;
use vars qw($DEBUG);
$DEBUG = $ENV{'MUNIN_DEBUG'};
my @palette =
#Better colours from munin 1.3.x
#Greens Blues Oranges Dk yel Dk blu Purple Lime Reds Gray
qw(00CC00 0066B3 FF8000 FFCC00 330099 990099 CCFF00 FF0000 808080
008F00 00487D B35A00 B38F00 6B006B 8FB300 B30000 BEBEBE
80FF80 80C9FF FFC080 FFE680 AA80FF EE00CC FF8080
666600 FFBFFF 00FFCC CC6699 999900);
my %oids = (
dfInodesUsed => '1.3.6.1.4.1.789.1.5.4.1.7.',
dfInodesFree => '1.3.6.1.4.1.789.1.5.4.1.8.',
);
if (defined $ARGV[0] and $ARGV[0] eq 'snmpconf') {
print "number 1.3.6.1.4.1.789.1.5.6.0\n";
print "index 1.3.6.1.4.1.789.1.5.4.1.1.\n";
foreach (keys %oids) {
print "require $oids{$_} [0-9]\n";
}
exit 0;
}
my $session = Munin::Plugin::SNMP->session();
my ($host, undef, undef, $tail) = Munin::Plugin::SNMP->config_session();
my ($df_id, $name_oid);
if ($tail =~ /^netapp_inodeusage_(\d+)$/) {
$df_id = $1;
$name_oid = '1.3.6.1.4.1.789.1.5.4.1.2.' . $df_id;
} else {
die "Couldn't understand what I'm supposed to monitor";
}
if (defined $ARGV[0] and $ARGV[0] eq "config") {
my $df_name = $session->get_single($name_oid);
print "host_name $host\n" unless $host eq 'localhost';
print "graph_title $host inodes usage on $df_name\n";
print "graph_args --base 1000 --lower-limit 0\n";
print "graph_vlabel bytes\n";
print "graph_category disk\n";
print "graph_info This graph shows the inodes usage for $df_name on NetApp host $host\n";
print "graph_order used avail total\n";
print "used.info The total inodes number of inodes in use on the $df_name file system.\n";
print "used.type GAUGE\n";
print "used.draw AREA\n";
print "used.label Used\n";
print "used.min 0\n";
print "used.colour $palette[1]\n";
print "avail.info The total number of inodes that are free for use on the $df_name file system.\n";
print "avail.type GAUGE\n";
print "avail.draw STACK\n";
print "avail.label Available\n";
print "avail.min 0\n";
print "avail.colour $palette[3]\n";
print "total.info The total capacity for the $df_name file system.\n";
print "total.type GAUGE\n";
print "total.draw LINE2\n";
print "total.label Total\n";
print "total.min 0\n";
print "total.colour $palette[7]\n";
exit 0;
}
my $used = $session->get_single($oids{dfInodesUsed}.$df_id);
my $avail = $session->get_single($oids{dfInodesFree}.$df_id);
my $total = $used + $avail;
print "used.value $used\n";
print "avail.value $avail\n";
print "total.value $total\n";
exit 0;
__END__

172
plugins/disk/snmp__swap Executable file
View file

@ -0,0 +1,172 @@
#!/usr/bin/perl -w
#
# Copyright (C) 2006 Lars Strand
#
# Munin plugin to monitor swap usage by use of SNMP.
# Based on the snmp__df plugin
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2 dated June,
# 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# $Log$
#
#%# family=snmpauto
#%# capabilities=snmpconf
use strict;
use Net::SNMP;
my $DEBUG = 0;
my $MAXLABEL = 20;
my $host = $ENV{host} || undef;
my $port = $ENV{port} || 161;
my $community = $ENV{community} || "public";
my $iface = $ENV{interface} || undef;
my $response;
if (defined $ARGV[0] and $ARGV[0] eq "snmpconf")
{
# HOST-RESOURCES-MIB::hrStorage
# HOST-RESOURCES-TYPES::hrStorageVirtualMemory
print "require 1.3.6.1.2.1.25.2. 1.3.6.1.2.1.25.2.1.3\n";
exit 0;
}
if ($0 =~ /^(?:|.*\/)snmp_([^_]+)_swap$/)
{
$host = $1;
if ($host =~ /^([^:]+):(\d+)$/)
{
$host = $1;
$port = $2;
}
}
elsif (!defined($host))
{
print "# Debug: $0 -- $1\n" if $DEBUG;
die "# Error: couldn't understand what I'm supposed to monitor.";
}
my ($session, $error) = Net::SNMP->session(
-hostname => $host,
-community => $community,
-port => $port
);
if (!defined ($session))
{
die "Croaking: $error";
}
my $hrStorage = "1.3.6.1.2.1.25.2.";
my $hrStorageVirtualMemory = "1.3.6.1.2.1.25.2.1.3";
my $hrStorageSize = "1.3.6.1.2.1.25.2.3.1.5.";
my $hrStorageUsed = "1.3.6.1.2.1.25.2.3.1.6.";
my $swap_d = get_by_regex($session, $hrStorage, $hrStorageVirtualMemory);
my $swapsize = 0; my $swapused = 0;
foreach my $swap (keys %$swap_d)
{
$swapsize += get_single($session, $hrStorageSize . $swap);
$swapused += get_single($session, $hrStorageUsed . $swap);
}
if (defined $ARGV[0] and $ARGV[0] eq "config")
{
print "host_name $host\n";
print "graph_title Virtual memory usage\n";
if ($swapsize > 0)
{
print "graph_args -l 0 --base 1000 --upper-limit $swapsize\n";
}
else
{
print "graph_args -l 0 --base 1000\n";
}
print "graph_vlabel Bytes\n";
print "graph_category disk\n";
print "graph_info This graph shows swap usage in bytes.\n";
print "swap.label swap\n";
print "swap.type DERIVE\n";
print "swap.min 0\n";
exit 0;
}
print "swap.value $swapused\n";
sub get_single
{
my $handle = shift;
my $oid = shift;
print "# Getting single $oid..." if $DEBUG;
$response = $handle->get_request ($oid);
if (!defined $response->{$oid})
{
print "undef\n" if $DEBUG;
return undef;
}
else
{
print "\"$response->{$oid}\"\n" if $DEBUG;
return $response->{$oid};
}
}
sub get_by_regex
{
my $handle = shift;
my $oid = shift;
my $regex = shift;
my $result = {};
my $num = 0;
my $ret = $oid . "0";
my $response;
print "# Starting browse of $oid...\n" if $DEBUG;
while (1)
{
if ($num == 0)
{
print "# Checking for $ret...\n" if $DEBUG;
$response = $handle->get_request ($ret);
}
if ($num or !defined $response)
{
print "# Checking for sibling of $ret...\n" if $DEBUG;
$response = $handle->get_next_request ($ret);
}
if (!$response)
{
return undef;
}
my @keys = keys %$response;
$ret = $keys[0];
print "# Analyzing $ret (compared to $oid)...\n" if $DEBUG;
last unless ($ret =~ /^$oid/);
$num++;
next unless ($response->{$ret} =~ /$regex/);
@keys = split (/\./, $ret);
$result->{$keys[-1]} = $response->{$ret};;
print "# Index $num: ", $keys[-1], " (", $response->{$ret}, ")\n" if $DEBUG;
};
return $result;
}

67
plugins/disk/xfs_frag Executable file
View file

@ -0,0 +1,67 @@
#!/bin/bash
: <<=cut
=head1 NAME
xfs_frag - Munin plugin to monitor the fragmentation level on your XFS filesystems
=head1 APPLICABLE SYSTEMS
Any machine with an XFS file system.
=head1 CONFIGURATION
None, generally, but you may want to run as root and set a timeout.
[xfs_frag]
user root
timeout 90
=head1 MAGIC MARKERS
#%# family=auto contrib
#%# capabilities=
=head1 VERSION
1
=head1 AUTHOR
Paul Saunders L<darac+munin@darac.org.uk>
=cut
declare -a ARRY
shopt -s nocasematch
case $1 in
config)
cat <<'EOF'
graph_title XFS fragmentation
graph_vlabel Percent
graph_category disk
EOF
cat /etc/mtab | awk '{print $2 " " $3}' | while read LINE
do
ARRY=($LINE)
if [[ ${ARRY[1]} =~ xfs ]]; then
FIELDNAME=$(echo ${ARRY[0]} | sed 's/^[^A-Za-z_]/_/; s/[^A-Za-z0-9_]/_/g')
echo "$FIELDNAME.label ${ARRY[0]}"
echo "$FIELDNAME.type GAUGE"
fi
done
exit 0
;;
esac
cat /etc/mtab | awk '{print $2 " " $3 " " $1}' | while read LINE
do
ARRY=($LINE)
if [[ ${ARRY[1]} =~ xfs ]]; then
FIELDNAME=$(echo ${ARRY[0]} | sed 's/^[^A-Za-z_]/_/; s/[^A-Za-z0-9_]/_/g')
FRAG=$(xfs_db -c frag -r ${ARRY[2]} | sed 's/.*fragmentation factor \(.*\)%.*/\1/')
echo $FIELDNAME.value $FRAG
fi
done