mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-24 18:07:20 +00:00
- have some dirs
This commit is contained in:
parent
0b089ea777
commit
08346aac58
687 changed files with 0 additions and 0 deletions
67
plugins/disk/df_abs_bsd
Executable file
67
plugins/disk/df_abs_bsd
Executable file
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2008, Net Easy, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Net Easy, Inc. nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY Net Easy, Inc. ''AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Net Easy, Inc. BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import re, os
|
||||
from sys import argv
|
||||
|
||||
class checker(object):
|
||||
def __init__(self):
|
||||
'parsed_data will have a tuple of the disk capacity and usage (in kbytes) and the mount point for each disk partition'
|
||||
self.parsed_data = {}
|
||||
|
||||
def __repr__(self):
|
||||
thisdata = ''
|
||||
for item in self.parsed_data.keys():
|
||||
thisdata = '%s%s.value %s\n' % (thisdata, item, int(self.parsed_data[item][1] * 1024))
|
||||
return thisdata
|
||||
|
||||
def config(self):
|
||||
thisdata = """graph_title Filesystem usage (in bytes)
|
||||
graph_args --base 1024 --lower-limit 0
|
||||
graph_vlabel bytes
|
||||
graph_category disk
|
||||
graph_info Filesystem usage
|
||||
"""
|
||||
for item in self.parsed_data.keys():
|
||||
thisdata = "%s%s.label %s\n%s.warning %s\n%s.critical %s\n" % (thisdata, item, self.parsed_data[item][2],
|
||||
item, int(self.parsed_data[item][0] * 1024 * 0.92),
|
||||
item, int(self.parsed_data[item][0] * 1024 * 0.98))
|
||||
return thisdata
|
||||
|
||||
def get_data(self):
|
||||
rawdata = os.popen('df -P -l -k').readlines()
|
||||
for i in range(1,len(rawdata)):
|
||||
dataline=rawdata[i].split()
|
||||
self.parsed_data[re.sub('/', '_', dataline[0])] = (int(dataline[1]), int(dataline[2]), dataline[5])
|
||||
|
||||
if __name__ == "__main__":
|
||||
processor = checker()
|
||||
processor.get_data()
|
||||
if len(argv) > 1 and argv[1] == 'config':
|
||||
print processor.config()
|
||||
else:
|
||||
print processor
|
65
plugins/disk/df_bsd
Executable file
65
plugins/disk/df_bsd
Executable file
|
@ -0,0 +1,65 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2008, Net Easy, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Net Easy, Inc. nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY Net Easy, Inc. ''AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL Net Easy, Inc. BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import re, os
|
||||
from sys import argv
|
||||
|
||||
class checker(object):
|
||||
def __init__(self):
|
||||
'parsed_data will have a tuple of the percentage of disk usage and the mount point for each disk partition'
|
||||
self.parsed_data = {}
|
||||
|
||||
def __repr__(self):
|
||||
thisdata = ''
|
||||
for item in self.parsed_data.keys():
|
||||
thisdata = '%s%s.value %s\n' % (thisdata, item, self.parsed_data[item][0])
|
||||
return thisdata
|
||||
|
||||
def config(self):
|
||||
thisdata = """graph_title Filesystem usage (in %)
|
||||
graph_args --lower-limit 0
|
||||
graph_vlabel %
|
||||
graph_category disk
|
||||
graph_info Filesystem usage
|
||||
"""
|
||||
for item in self.parsed_data.keys():
|
||||
thisdata = "%s%s.label %s\n%s.warning 92\n%s.critical 98\n" % (thisdata, item, self.parsed_data[item][1], item, item)
|
||||
return thisdata
|
||||
|
||||
def get_data(self):
|
||||
rawdata = os.popen('df -P -l').readlines()
|
||||
for i in range(1,len(rawdata)):
|
||||
dataline=rawdata[i].split()
|
||||
self.parsed_data[re.sub('/', '_', dataline[0])] = (re.sub('%', '', dataline[4]), dataline[5])
|
||||
|
||||
if __name__ == "__main__":
|
||||
processor = checker()
|
||||
processor.get_data()
|
||||
if len(argv) > 1 and argv[1] == 'config':
|
||||
print processor.config()
|
||||
else:
|
||||
print processor
|
101
plugins/disk/df_with_nfs
Executable file
101
plugins/disk/df_with_nfs
Executable file
|
@ -0,0 +1,101 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Script to monitor disk usage.
|
||||
#
|
||||
# Parameters understood:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - used by munin-config)
|
||||
#
|
||||
# $Log$
|
||||
# Revision 1.5.2.5 2011/04/20 14:25:07 ward
|
||||
# Exclude tmpfs partitions from 'config'.
|
||||
#
|
||||
# Revision 1.5.2.4 2005/03/12 21:35:07 jimmyo
|
||||
# Correct history loss in linux/{df,df_inode}.
|
||||
#
|
||||
# Revision 1.5.2.3 2005/03/10 10:04:48 jimmyo
|
||||
# Fixed minor bug introduced with yesterdays change.
|
||||
#
|
||||
# Revision 1.5.2.2 2005/03/09 19:10:32 jimmyo
|
||||
# Made linux/df work properly with tmpfs and devmapper (Deb#298442).
|
||||
#
|
||||
# Revision 1.5.2.1 2005/02/16 22:50:14 jimmyo
|
||||
# linux/df* now ignores bind mounts.
|
||||
#
|
||||
# Revision 1.5 2004/12/09 20:27:45 jimmyo
|
||||
# Sort fields in df*-plugins alphabetically.
|
||||
#
|
||||
# Revision 1.4 2004/09/25 22:29:16 jimmyo
|
||||
# Added info fields to a bunch of plugins.
|
||||
#
|
||||
# Revision 1.3 2004/05/20 13:57:12 jimmyo
|
||||
# Set categories to some of the plugins.
|
||||
#
|
||||
# Revision 1.2 2004/05/18 22:04:30 jimmyo
|
||||
# Use "sed 1d" instead of "tail +2" in df plugins (patch by Olivier Delhomme).
|
||||
#
|
||||
# Revision 1.1 2004/01/02 18:50:01 jimmyo
|
||||
# Renamed occurrances of lrrd -> munin
|
||||
#
|
||||
# Revision 1.1.1.1 2004/01/02 15:18:07 jimmyo
|
||||
# Import of LRRD CVS tree after renaming to Munin
|
||||
#
|
||||
# Revision 1.2 2003/11/07 17:43:16 jimmyo
|
||||
# Cleanups and log entries
|
||||
#
|
||||
#
|
||||
#
|
||||
# Magic markers (optional - used by munin-config and installation
|
||||
# scripts):
|
||||
#
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
MAXLABEL=20
|
||||
|
||||
if [ "$1" = "autoconf" ]; then
|
||||
echo yes
|
||||
exit 0
|
||||
fi
|
||||
|
||||
clean_name() {
|
||||
echo $1 $7 $2 | sed 's/[\/.-]/_/g'| awk "{
|
||||
if (\$3 == \"tmpfs\")
|
||||
n=\$1\$2
|
||||
else
|
||||
n=\$1
|
||||
print n
|
||||
}"
|
||||
}
|
||||
|
||||
|
||||
if [ "$1" = "config" ]; then
|
||||
|
||||
echo 'graph_title Filesystem usage (in %)'
|
||||
echo 'graph_args --upper-limit 100 -l 0'
|
||||
echo 'graph_vlabel %'
|
||||
echo 'graph_category disk'
|
||||
echo 'graph_info This graph shows disk usage on the machine.'
|
||||
df -T -P -l -x none -x unknown -x udf -x iso9660 -x romfs -x ramfs -x tmpfs | sed 1d | grep -v "//" | sort | while read i; do
|
||||
name=`clean_name $i`
|
||||
echo -n "$name.label "
|
||||
echo $i | awk "{
|
||||
dir=\$7
|
||||
if (length(dir) <= $MAXLABEL)
|
||||
print dir
|
||||
else
|
||||
printf (\"...%s\n\", substr (dir, length(dir)-$MAXLABEL+4, $MAXLABEL-3))
|
||||
print \"$name.info \" \$7 \" (\" \$2 \") -> \" \$1;
|
||||
}"
|
||||
echo "$name.warning 92"
|
||||
echo "$name.critical 98"
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
df -T -P -l -x none -x unknown -x udf -x iso9660 -x romfs -x ramfs -x proc -x sysfs -x tmpfs -x udev | sed 1d | grep -v "//" | while read i; do
|
||||
name=`clean_name $i`
|
||||
echo -n "$name.value "
|
||||
echo $i | awk '{ print $6 }' | cut -f1 -d%
|
||||
done
|
109
plugins/disk/dirsizes
Executable file
109
plugins/disk/dirsizes
Executable file
|
@ -0,0 +1,109 @@
|
|||
#!/usr/bin/perl
|
||||
# -*- perl -*-
|
||||
#
|
||||
#
|
||||
##############################################################################
|
||||
#
|
||||
#
|
||||
# This munin plugin watches the sizes of the given directories.
|
||||
# @author Kevin Fischer
|
||||
# @version 2010/08/05
|
||||
# @website http://kevin-fischer.de
|
||||
#
|
||||
# Copy this to your node's config file (default: plugin-conf.d/munin-node):
|
||||
# [dirsizes]
|
||||
# user root
|
||||
# env.watchdirs /var/www,/tmp
|
||||
#
|
||||
# Change the env.watchdirs-variable according to your wishes.
|
||||
# DONT FORGET TO RUN AS ROOT!
|
||||
#
|
||||
# You can test this plugin by calling it with params "test" and your watchdirs:
|
||||
# ./dirsizes test /dir1,/tmp/dir2
|
||||
#
|
||||
#
|
||||
##############################################################################
|
||||
#
|
||||
|
||||
use strict;
|
||||
my @watchdirs;
|
||||
|
||||
if ( exists $ARGV[0] and $ARGV[0] eq "test" ) {
|
||||
|
||||
# Split the watchdirs string
|
||||
@watchdirs = split( ",", $ARGV[1] );
|
||||
}
|
||||
else {
|
||||
|
||||
# If no dirs are given, exit.
|
||||
if ( !defined( $ENV{"watchdirs"} ) ) {
|
||||
die "No directories given! See the manual at top of this plugin file.";
|
||||
}
|
||||
|
||||
# Split the watchdirs string
|
||||
@watchdirs = split( ",", $ENV{"watchdirs"} );
|
||||
}
|
||||
|
||||
# Config or read request?
|
||||
if ( exists $ARGV[0] and $ARGV[0] eq "config" ) {
|
||||
|
||||
# Munin basic info
|
||||
print "graph_title Directory sizes\n";
|
||||
print "graph_args --base 1024 --lower-limit 0\n";
|
||||
print "graph_vlabel directory size\n";
|
||||
print "graph_info Displays the sizes of all configured directories.\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_total Total\n";
|
||||
|
||||
# All available directories
|
||||
foreach my $dir (@watchdirs) {
|
||||
|
||||
# Remove illegal characters
|
||||
my $label = $dir;
|
||||
$label =~ s@[\/-]@_@g;
|
||||
|
||||
# Print name
|
||||
print "dir", $label, ".label ", $dir, "\n";
|
||||
}
|
||||
}
|
||||
|
||||
# Read request, output the directory sizes
|
||||
else {
|
||||
|
||||
# All available directories
|
||||
foreach my $dir (@watchdirs) {
|
||||
|
||||
# Remove illegal characters
|
||||
my $label = $dir;
|
||||
$label =~ s@[\/-]@_@g;
|
||||
|
||||
# Get the dirsize
|
||||
my $dirsize = getSize($dir);
|
||||
|
||||
# Get the label
|
||||
my $label = niceLabelname($dir);
|
||||
|
||||
# Print name
|
||||
print "dir", $label, ".value ", $dirsize, ".0\n";
|
||||
}
|
||||
}
|
||||
|
||||
# Function: getSize($dir)
|
||||
sub getSize {
|
||||
my ($dir) = @_;
|
||||
|
||||
# Get the size via `du`
|
||||
my @dirsize = split( ' ', `du -cb $dir | grep "total" | tail -1 ` );
|
||||
return @dirsize[0];
|
||||
}
|
||||
|
||||
# Remove illegal characters
|
||||
sub niceLabelname {
|
||||
my ($label) = @_;
|
||||
|
||||
$label =~ s@[\/-]@_@g;
|
||||
return $label;
|
||||
}
|
||||
|
||||
exit 0;
|
||||
|
89
plugins/disk/du
Executable file
89
plugins/disk/du
Executable file
|
@ -0,0 +1,89 @@
|
|||
#!/bin/bash
|
||||
|
||||
#################################################################
|
||||
#
|
||||
# Plugin to monitor the size of the specified directory
|
||||
#
|
||||
#################################################################
|
||||
#
|
||||
# Parameters understood:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - checks if the path exists etc, not so advanced feature)
|
||||
#
|
||||
#################################################################
|
||||
#
|
||||
# Requirements
|
||||
# - bash (or change first line to sh instead of bash or any other shell)
|
||||
# - existing and readable directory to scan
|
||||
# - du command, it exists on most of the *nix operating systems
|
||||
#
|
||||
#################################################################
|
||||
#
|
||||
# Configuration
|
||||
#
|
||||
# directory to check
|
||||
DIR="/var/cache/apache2/"
|
||||
|
||||
# unique id, just in case you got multiple such scripts, change id as needed (i guess it shoudl be obsolete, not tested)
|
||||
ID=1;
|
||||
|
||||
# - make sure that user/group that executes this script has acess to the directory you ahve configured
|
||||
# otherwise run it as another user, edit plugins-conf.d/munin-node and stuff it with example below code (not suggested)
|
||||
# remeber to remove hashes from the beginning of the lines
|
||||
#
|
||||
# [du]
|
||||
# user root
|
||||
#
|
||||
# - by default the value is in MegaBytes, to change it you should edit below line in the script to something else, recognizeable by du (see man du)
|
||||
# du -sm $DIR in MB
|
||||
# du -sk $DIR in KB
|
||||
#
|
||||
#################################################################
|
||||
#
|
||||
# Changelog
|
||||
#
|
||||
# Revision 0.1 Tue 03 Feb 2009 02:16:02 PM CET _KaszpiR_
|
||||
# - initial release,
|
||||
#
|
||||
#################################################################
|
||||
# Magick markers (optional - used by munin-config and som installation
|
||||
# scripts):
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
#################################################################
|
||||
#################################################################
|
||||
|
||||
if [ "$1" = "autoconf" ]; then
|
||||
if [ -d $DIR ]; then
|
||||
echo "yes"
|
||||
exit 0
|
||||
else
|
||||
echo "no (check your path)"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$1" = "config" ]; then
|
||||
|
||||
echo "graph_title Directory size: $DIR"
|
||||
echo "graph_vlabel size MB"
|
||||
echo "graph_category disk"
|
||||
echo "graph_info Size of $DIR"
|
||||
echo "dir$ID.label size"
|
||||
echo "dir$ID.min 0"
|
||||
echo "dir$ID.info Shows du -sm for specified directory"
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo -n "dir$ID.value "
|
||||
if [ -d $DIR ]; then
|
||||
SIZE=`du -sm $DIR | cut -f1`
|
||||
echo $SIZE
|
||||
exit 0
|
||||
else
|
||||
echo "U"
|
||||
exit 1
|
||||
fi
|
168
plugins/disk/du-2
Executable file
168
plugins/disk/du-2
Executable file
|
@ -0,0 +1,168 @@
|
|||
#!/usr/bin/perl
|
||||
# vim: set filetype=perl sw=4 tabstop=4 expandtab smartindent: #
|
||||
|
||||
=head1 NAME
|
||||
|
||||
du - Plugin to monitor multiple directories size
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Luc Didry <luc AT didry.org>
|
||||
April 2011
|
||||
|
||||
=head1 HOWTO CONFIGURE AND USE :
|
||||
|
||||
=over
|
||||
|
||||
=item - /etc/munin/plugin-conf.d/du_
|
||||
|
||||
[du]
|
||||
user root
|
||||
env.interval 20 # INTERVAL OF DU POLLING IN MINUTES
|
||||
env.dirs /home/foo /home/bar # DIRECTORIES TO POLL
|
||||
env.suppr /home/ # PLEASE USE \# INSTEAD OF #
|
||||
timeout 900 # 15 MINUTES IN SECONDS
|
||||
|
||||
=item - /etc/munin/plugins-enabled
|
||||
|
||||
ln -svf ../plugins-available/site/du
|
||||
|
||||
|
||||
=item - restart Munin node
|
||||
|
||||
sudo killall -TERM munin-node
|
||||
|
||||
=back
|
||||
|
||||
=head1 CREDITS
|
||||
|
||||
Based on the 'du_multidirs-v2' initially written in Bash by Christian Kujau <lists@nerdbynature.de> and modified by dano229.
|
||||
This script was based on the 'homedirs' plugin, initially written in Perl by Philipp Gruber <pg@flupps.net>
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
=cut
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use Munin::Plugin;
|
||||
use POSIX qw(setsid);
|
||||
|
||||
my $PLUGIN_NAME = "du";
|
||||
my $CACHEFILE="$Munin::Plugin::pluginstatedir/du.cache";
|
||||
my $TEMPFILE="$Munin::Plugin::pluginstatedir/du.tmp";
|
||||
my $LOCKFILE="$Munin::Plugin::pluginstatedir/du.lock";
|
||||
my $TIMEFILE="$Munin::Plugin::pluginstatedir/du.time";
|
||||
|
||||
##### autoconf
|
||||
if( (defined $ARGV[0]) && ($ARGV[0] eq "autoconf") ) {
|
||||
print "yes\n";
|
||||
## Done !
|
||||
munin_exit_done();
|
||||
}
|
||||
|
||||
## In the parent, it's just a regular munin plugin which reads a file with the infos
|
||||
##### config
|
||||
if( (defined $ARGV[0]) && ($ARGV[0] eq "config") ) {
|
||||
print "graph_title Directory usage\n";
|
||||
print "graph_args --base 1024 -l 1\n";
|
||||
print "graph_vlabel Bytes\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_total total\n";
|
||||
print "graph_info This graph shows the size of several directories\n";
|
||||
|
||||
my $foo = 0;
|
||||
open (FILE, "<", $CACHEFILE) or munin_exit_fail();
|
||||
while(defined (my $bar = <FILE>)) {
|
||||
if ($bar =~ m/(\d+)\s+(.+)/) {
|
||||
my $dir = $2;
|
||||
clean_path(\$dir);
|
||||
print "$dir.label $dir\n";
|
||||
if ($foo++) {
|
||||
print "$dir.draw STACK\n";
|
||||
} else {
|
||||
print "$dir.draw AREA\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
## Done !
|
||||
munin_exit_done();
|
||||
}
|
||||
|
||||
##### fetch
|
||||
open (FILE, "<", $CACHEFILE) or munin_exit_fail();
|
||||
while(defined (my $foo = <FILE>)) {
|
||||
if ($foo =~ m/(\d+)\s+(.+)/) {
|
||||
my ($field, $value) = ($2, $1);
|
||||
clean_path(\$field);
|
||||
print $field, ".value ", $value, "\n";
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
daemonize();
|
||||
|
||||
#
|
||||
##
|
||||
### PUBLiC FONCTiONS
|
||||
###############################################################################
|
||||
## Used to create the fork
|
||||
sub daemonize {
|
||||
chdir '/' or die "Can't chdir to /: $!";
|
||||
defined(my $pid = fork) or die "Can't fork: $!";
|
||||
munin_exit_done() if $pid;
|
||||
open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
|
||||
open STDOUT, '>/dev/null' or die "Can't write to /dev/null: $!";
|
||||
open STDERR, '>&STDOUT' or die "Can't dup stdout: $!";
|
||||
setsid or die "Can't start a new session: $!";
|
||||
## In the child, let's get the du infos if necessary
|
||||
if (cache_is_too_old() && du_not_running()) {
|
||||
my $dirs = $ENV{dirs};
|
||||
system("touch $LOCKFILE; du -sb $dirs > $TEMPFILE; cat $TEMPFILE > $CACHEFILE; rm $LOCKFILE; date +%s > $TIMEFILE;");
|
||||
}
|
||||
exit;
|
||||
} ## daemonize
|
||||
|
||||
## Used to remove the beginning of the paths if wanted
|
||||
sub clean_path {
|
||||
my ($path) = @_;
|
||||
if (defined $ENV{suppr}) {
|
||||
my $pattern = $ENV{suppr};
|
||||
$$path =~ s#^($pattern)##;
|
||||
}
|
||||
} ## clean_path
|
||||
|
||||
## Do you really need I told you what this function is going to check ?
|
||||
sub cache_is_too_old {
|
||||
return 1 if (! -e $TIMEFILE);
|
||||
my ($time) = `cat $TIMEFILE`;
|
||||
chomp $time;
|
||||
return 1 if ( (time - $time) > ($ENV{interval}*60) );
|
||||
return 0;
|
||||
} ## cache_is_too_old
|
||||
|
||||
sub du_not_running {
|
||||
return 0 if (-e $LOCKFILE);
|
||||
return 1;
|
||||
}
|
||||
sub munin_exit_done {
|
||||
__munin_exit(0);
|
||||
} ## sub munin_exit_done
|
||||
|
||||
|
||||
sub munin_exit_fail {
|
||||
__munin_exit(1);
|
||||
} ## sub munin_exit_fail
|
||||
|
||||
#
|
||||
##
|
||||
### iNTERNALS FONCTiONS
|
||||
###############################################################################
|
||||
sub __munin_exit {
|
||||
my $exitcode = shift;
|
||||
exit($exitcode) if(defined $exitcode);
|
||||
exit(1);
|
||||
} ## sub __munin_exit
|
33
plugins/disk/du_multidirs
Executable file
33
plugins/disk/du_multidirs
Executable file
|
@ -0,0 +1,33 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# (c)2009, Christian Kujau <lists@nerdbynature.de> modified by dano229
|
||||
# Based on the 'homedirs' plugin, initially written in Perl by Philipp Gruber <pg@flupps.net>
|
||||
#
|
||||
# We still need a cronjob to update CACHEFILE once in a while, e.g.:
|
||||
# 0 * * * * root [ -O /tmp/munin-du_multidirs.cache ] && du -sk /dir /dir2 dir3/* > /tmp/munin-du_multidirs.cache
|
||||
#
|
||||
CACHEFILE=/tmp/munin-du_multidirs.cache
|
||||
|
||||
if [ "$1" = "autoconf" ]; then
|
||||
echo yes
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$1" = "config" ]; then
|
||||
echo 'graph_title Directory usage'
|
||||
echo 'graph_args --base 1024 -l 1'
|
||||
echo 'graph_vlabel Bytes'
|
||||
echo 'graph_category disk'
|
||||
echo 'graph_info This graph shows the size of several directories'
|
||||
|
||||
awk '!/lost\+found/ {print $2 }' $CACHEFILE | sort | while read label; do
|
||||
field=`echo "$label" | sed 's/^[^A-Za-z_]/_/' | sed 's/[^A-Za-z0-9_]/_/g'`
|
||||
echo "$field".label "$label"
|
||||
echo "$field".draw LINE1
|
||||
# echo "$field".warning 0
|
||||
# echo "$field".critical 0
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
awk '!/lost\+found/ { sub(/[^a-zA-Z_]/,"_",$2); gsub(/[^a-zA-Z0-9_]/,"_",$2); print $2".value "$1 * 1024 }' $CACHEFILE | sort -r -n -k2
|
35
plugins/disk/freedisk
Executable file
35
plugins/disk/freedisk
Executable file
|
@ -0,0 +1,35 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Wildcard-plugin to monitor free disk.
|
||||
#
|
||||
# ln -s /usr/share/munin/plugins/freedisk_ /etc/munin/plugins/freedisk_hda1
|
||||
#
|
||||
# ...will monitor /dev/hda1.
|
||||
#
|
||||
# $Log$
|
||||
# Revision 0.1 2010/11/01 guenter@grodotzki.ph
|
||||
# init
|
||||
|
||||
# parse wildcard config
|
||||
DISK=`basename $0 | sed 's/^freedisk_//g'`
|
||||
|
||||
# output config
|
||||
if [ "$1" = "config" ]; then
|
||||
echo "graph_title Free Disk on /dev/$DISK"
|
||||
echo "graph_args --base 1024"
|
||||
echo "graph_vlabel Free Disk in Bytes"
|
||||
echo "graph_category disk"
|
||||
echo "freedisk.label free Bytes"
|
||||
echo "freedisk.draw LINE3"
|
||||
echo "freedisk.cdef freedisk,1024,*"
|
||||
echo "totaldisk.label total Bytes"
|
||||
echo "totaldisk.draw AREA"
|
||||
echo "totaldisk.cdef totaldisk,1024,*"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# get data
|
||||
OUTPUT=`df -P | grep "$DISK"`
|
||||
|
||||
echo "freedisk.value `echo $OUTPUT | cut -d ' ' -f 4`"
|
||||
echo "totaldisk.value `echo $OUTPUT | cut -d ' ' -f 2`"
|
55
plugins/disk/iostat
Executable file
55
plugins/disk/iostat
Executable file
|
@ -0,0 +1,55 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Monitor disk iostat on FreeBSD host.
|
||||
#
|
||||
# Parameters understood:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - used by munin-config)
|
||||
#
|
||||
# Magic markers (optional - used by munin-config and installation
|
||||
# scripts):
|
||||
#
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
PATH=/bin:/usr/bin
|
||||
|
||||
if [ "$1" = "autoconf" ]; then
|
||||
echo yes
|
||||
exit 0
|
||||
fi
|
||||
|
||||
DISKS=`/usr/sbin/iostat -dIn9 | head -1`
|
||||
|
||||
if [ "$1" = "config" ]; then
|
||||
echo 'graph_title IOstat'
|
||||
echo 'graph_args --base 1024 -l 0'
|
||||
echo 'graph_vlabel Bytes per ${graph_period}'
|
||||
echo 'graph_category disk'
|
||||
echo 'graph_info This graph shows disk load on the machine.'
|
||||
|
||||
for D in $DISKS
|
||||
do
|
||||
if echo $D | grep -vq '^pass'; then
|
||||
echo "$D.label $D"
|
||||
echo "$D.type DERIVE"
|
||||
echo "$D.min 0"
|
||||
fi
|
||||
done
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
VALUES=`/usr/sbin/iostat -dIn9 | tail -1`
|
||||
COL=3 # 3rd value for each disk is grabbed
|
||||
|
||||
for D in $DISKS
|
||||
do
|
||||
if echo $D | grep -vq '^pass'; then
|
||||
echo -n "$D.value "
|
||||
VAL=`echo $VALUES | cut -d ' ' -f $COL`
|
||||
echo "$VAL 1048576 * p" | dc | cut -d '.' -f 1
|
||||
fi
|
||||
COL=$(($COL + 3))
|
||||
done
|
54
plugins/disk/iostat-xfrs
Executable file
54
plugins/disk/iostat-xfrs
Executable file
|
@ -0,0 +1,54 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Monitor disk iostat on FreeBSD host.
|
||||
#
|
||||
# Parameters understood:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - used by munin-config)
|
||||
#
|
||||
# Magic markers (optional - used by munin-config and installation
|
||||
# scripts):
|
||||
#
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
PATH=/bin:/usr/bin
|
||||
|
||||
if [ "$1" = "autoconf" ]; then
|
||||
echo yes
|
||||
exit 0
|
||||
fi
|
||||
|
||||
DISKS=`/usr/sbin/iostat -dIn9 | head -1`
|
||||
|
||||
if [ "$1" = "config" ]; then
|
||||
echo 'graph_title IOstat xfrs'
|
||||
echo 'graph_args -l 0'
|
||||
echo 'graph_vlabel Transfers per ${graph_period}'
|
||||
echo 'graph_category disk'
|
||||
echo 'graph_info This graph shows disk load on the machine.'
|
||||
|
||||
for D in $DISKS
|
||||
do
|
||||
if echo $D | grep -vq '^pass'; then
|
||||
echo "$D.label $D"
|
||||
echo "$D.type DERIVE"
|
||||
echo "$D.min 0"
|
||||
fi
|
||||
done
|
||||
|
||||
exit 0
|
||||
fi
|
||||
|
||||
VALUES=`/usr/sbin/iostat -dIn9 | tail -1`
|
||||
COL=2 # 2nd value for each disk is grabbed
|
||||
|
||||
for D in $DISKS
|
||||
do
|
||||
if echo $D | grep -vq '^pass'; then
|
||||
echo -n "$D.value "
|
||||
echo $VALUES | cut -d ' ' -f $COL
|
||||
fi
|
||||
COL=$(($COL + 3))
|
||||
done
|
702
plugins/disk/linux_diskstat_
Executable file
702
plugins/disk/linux_diskstat_
Executable file
|
@ -0,0 +1,702 @@
|
|||
#!/usr/bin/perl -w
|
||||
# vim: sts=4 sw=4 ts=8
|
||||
|
||||
# Munin markers:
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf suggest
|
||||
|
||||
# Author: Michael Renner <michael.renner@amd.co.at>
|
||||
|
||||
# Version: 0.0.5, 2009-05-22
|
||||
|
||||
|
||||
|
||||
=head1 NAME
|
||||
|
||||
linux_diskstat_ - Munin plugin to monitor various values provided
|
||||
via C</proc/diskstats>
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
Linux 2.6 systems with extended block device statistics enabled.
|
||||
|
||||
|
||||
=head1 INTERPRETATION
|
||||
|
||||
Among the more self-describing or well-known values like C<throughput>
|
||||
(Bytes per second) there are a few which might need further introduction.
|
||||
|
||||
|
||||
=head2 Device Utilization
|
||||
|
||||
Linux provides a counter which increments in a millisecond-interval for as long
|
||||
as there are outstanding I/O requests. If this counter is close to 1000msec
|
||||
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
|
||||
provides values averaged over a 5 minute time frame per default, so it can't
|
||||
catch short-lived saturations, but it'll give a nice trend for semi-uniform
|
||||
load patterns as they're expected in most server or multi-user environments.
|
||||
|
||||
|
||||
=head2 Device IO Time
|
||||
|
||||
The C<Device IO Time> takes the counter described under C<Device Utilization>
|
||||
and divides it by the number of I/Os that happened in the given time frame,
|
||||
resulting in an average time per I/O on the block-device level.
|
||||
|
||||
This value can give you a good comparison base amongst different controllers,
|
||||
storage subsystems and disks for similiar workloads.
|
||||
|
||||
|
||||
=head2 Syscall Wait Time
|
||||
|
||||
These values describe the average time it takes between an application issuing
|
||||
a syscall resulting in a hit to a blockdevice to the syscall returning to the
|
||||
application.
|
||||
|
||||
The values are bound to be higher (at least for read requests) than the time
|
||||
it takes the device itself to fulfill the requests, since calling overhead,
|
||||
queuing times and probably a dozen other things are included in those times.
|
||||
|
||||
These are the values to watch out for when an user complains that C<the disks
|
||||
are too slow!>.
|
||||
|
||||
|
||||
=head3 What causes a block device hit?
|
||||
|
||||
A non-exhaustive list:
|
||||
|
||||
=over
|
||||
|
||||
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
|
||||
flag is set.
|
||||
|
||||
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
|
||||
is exceeded.
|
||||
|
||||
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
|
||||
modification of any of the values returned by stat(2), etc.)
|
||||
|
||||
=item * The pdflush daemon writing out dirtied pages
|
||||
|
||||
=item * (f)sync
|
||||
|
||||
=item * Swapping
|
||||
|
||||
=item * raw device I/O (mkfs, dd, etc.)
|
||||
|
||||
=back
|
||||
|
||||
=head1 ACKNOWLEDGEMENTS
|
||||
|
||||
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
|
||||
package written and maintained by Sebastien Godard.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
See C<Documentation/iostats.txt> in your Linux source tree for further information
|
||||
about the C<numbers> involved in this module.
|
||||
|
||||
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
|
||||
about the pdflush daemon.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael Renner <michael.renner@amd.co.at>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2
|
||||
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
use strict;
|
||||
|
||||
|
||||
use File::Basename;
|
||||
use Carp;
|
||||
use POSIX;
|
||||
|
||||
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
|
||||
# Don't try this at home
|
||||
eval { require Munin::Plugin; Munin::Plugin->import; };
|
||||
|
||||
if ($@) {
|
||||
fake_munin_plugin();
|
||||
}
|
||||
|
||||
|
||||
# Sanity check to ensure that the script is called the correct name.
|
||||
|
||||
if (basename($0) !~ /^linux_diskstat_/) {
|
||||
die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
|
||||
}
|
||||
|
||||
|
||||
############
|
||||
# autoconf #
|
||||
############
|
||||
|
||||
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
|
||||
my %stats;
|
||||
|
||||
# Capture any croaks on the way
|
||||
eval { %stats = parse_diskstats() };
|
||||
|
||||
if ( !$@ && keys %stats ) {
|
||||
|
||||
print "yes\n";
|
||||
exit 0;
|
||||
}
|
||||
else {
|
||||
print "no\n";
|
||||
exit 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
###########
|
||||
# suggest #
|
||||
###########
|
||||
|
||||
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
|
||||
|
||||
my %diskstats = parse_diskstats();
|
||||
|
||||
my %suggested_devices;
|
||||
|
||||
DEVICE:
|
||||
for my $devname ( sort keys %diskstats ) {
|
||||
|
||||
# Skip devices without traffic
|
||||
next
|
||||
if ( $diskstats{$devname}->{'rd_ios'} == 0
|
||||
&& $diskstats{$devname}->{'wr_ios'} == 0 );
|
||||
|
||||
for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
|
||||
|
||||
# Filter out devices (partitions) which are matched by existing ones
|
||||
# e.g. sda1 -> sda, c0d0p1 -> c0d0
|
||||
next DEVICE if ( $devname =~ m/$existing_device/ );
|
||||
}
|
||||
|
||||
push @{ $suggested_devices{'iops'} }, $devname;
|
||||
push @{ $suggested_devices{'throughput'} }, $devname;
|
||||
|
||||
# Only suggest latency graphs if the device supports it
|
||||
if ( $diskstats{$devname}->{'rd_ticks'} > 0
|
||||
|| $diskstats{$devname}->{'wr_ticks'} > 0 )
|
||||
{
|
||||
push @{ $suggested_devices{'latency'} }, $devname;
|
||||
}
|
||||
}
|
||||
|
||||
for my $mode ( keys %suggested_devices ) {
|
||||
for my $device ( sort @{ $suggested_devices{$mode} } ) {
|
||||
|
||||
my $printdev = translate_device_name($device, 'TO_FS');
|
||||
print "${mode}_$printdev\n";
|
||||
}
|
||||
}
|
||||
|
||||
exit 0;
|
||||
}
|
||||
|
||||
|
||||
# Reading the scripts invocation name and setting some parameters,
|
||||
# needed from here on
|
||||
|
||||
my $basename = basename($0);
|
||||
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
|
||||
|
||||
if ( not defined $device ) {
|
||||
croak "Didn't get a device name. Aborting\n";
|
||||
}
|
||||
|
||||
$device = translate_device_name($device, 'FROM_FS');
|
||||
|
||||
##########
|
||||
# config #
|
||||
##########
|
||||
|
||||
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
|
||||
|
||||
my $pretty_device = $device;
|
||||
|
||||
if ($device =~ /^dm-\d+$/) {
|
||||
$pretty_device = translate_devicemapper_name($device);
|
||||
}
|
||||
|
||||
if ( $mode eq 'latency' ) {
|
||||
|
||||
print <<EOF;
|
||||
graph_title Disk latency for /dev/$pretty_device
|
||||
graph_args --base 1000
|
||||
graph_category disk
|
||||
|
||||
util.label Device utilization (percent)
|
||||
util.type GAUGE
|
||||
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
|
||||
util.min 0
|
||||
svctm.label Average device IO time (ms)
|
||||
svctm.type GAUGE
|
||||
svctm.info Average time an I/O takes on the block device
|
||||
svctm.min 0
|
||||
avgwait.label Average IO Wait time (ms)
|
||||
avgwait.type GAUGE
|
||||
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
|
||||
avgwait.min 0
|
||||
avgrdwait.label Average Read IO Wait time (ms)
|
||||
avgrdwait.type GAUGE
|
||||
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
|
||||
avgrdwait.min 0
|
||||
avgwrwait.label Average Write IO Wait time (ms)
|
||||
avgwrwait.type GAUGE
|
||||
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
|
||||
avgwrwait.min 0
|
||||
|
||||
EOF
|
||||
|
||||
}
|
||||
elsif ( $mode eq 'throughput' ) {
|
||||
|
||||
print <<EOF;
|
||||
graph_title Disk throughput for /dev/$pretty_device
|
||||
graph_args --base 1024
|
||||
graph_vlabel Bytes/second
|
||||
graph_category disk
|
||||
|
||||
rdbytes.label Read Bytes
|
||||
rdbytes.type GAUGE
|
||||
rdbytes.min 0
|
||||
wrbytes.label Write Bytes
|
||||
wrbytes.type GAUGE
|
||||
wrbytes.min 0
|
||||
|
||||
EOF
|
||||
}
|
||||
elsif ( $mode eq 'iops' ) {
|
||||
|
||||
print <<EOF;
|
||||
graph_title Disk IOs for /dev/$pretty_device
|
||||
graph_args --base 1000
|
||||
graph_vlabel Units/second
|
||||
graph_category disk
|
||||
|
||||
rdio.label Read IO/sec
|
||||
rdio.type GAUGE
|
||||
rdio.min 0
|
||||
wrio.label Write IO/sec
|
||||
wrio.type GAUGE
|
||||
wrio.min 0
|
||||
avgrqsz.label Average Request Size (KiB)
|
||||
avgrqsz.type GAUGE
|
||||
avgrqsz.min 0
|
||||
avgrdrqsz.label Average Read Request Size (KiB)
|
||||
avgrdrqsz.type GAUGE
|
||||
avgrdrqsz.min 0
|
||||
avgwrrqsz.label Average Write Request Size (KiB)
|
||||
avgwrrqsz.type GAUGE
|
||||
avgwrrqsz.min 0
|
||||
|
||||
EOF
|
||||
|
||||
}
|
||||
else {
|
||||
croak "Unknown mode $mode\n";
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
|
||||
########
|
||||
# MAIN #
|
||||
########
|
||||
|
||||
|
||||
my %cur_diskstat = fetch_device_counters($device);
|
||||
|
||||
|
||||
my ( $prev_time, %prev_diskstat ) = restore_state();
|
||||
|
||||
save_state( time(), %cur_diskstat );
|
||||
|
||||
# Probably the first run for the given device, we need state to do our job,
|
||||
# so let's wait for the next run.
|
||||
exit if ( not defined $prev_time or not %prev_diskstat );
|
||||
|
||||
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
|
||||
|
||||
|
||||
|
||||
########
|
||||
# SUBS #
|
||||
########
|
||||
|
||||
sub calculate_and_print_values {
|
||||
my ( $prev_time, $prev_stats, $cur_stats ) = @_;
|
||||
|
||||
my $bytes_per_sector = 512;
|
||||
|
||||
my $interval = time() - $prev_time;
|
||||
|
||||
my $read_ios = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
|
||||
my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
|
||||
|
||||
my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
|
||||
my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
|
||||
|
||||
my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
|
||||
my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
|
||||
|
||||
my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
|
||||
|
||||
|
||||
my $read_io_per_sec = $read_ios / $interval;
|
||||
my $write_io_per_sec = $write_ios / $interval;
|
||||
|
||||
my $read_bytes_per_sec = $rd_sectors / $interval * $bytes_per_sector;
|
||||
my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
|
||||
|
||||
|
||||
my $total_ios = $read_ios + $write_ios;
|
||||
my $total_ios_per_sec = $total_ios / $interval;
|
||||
|
||||
# Utilization - or "how busy is the device"?
|
||||
# If the time spent for I/O was close to 1000msec for
|
||||
# a given second, the device is nearly 100% saturated.
|
||||
my $utilization = $tot_ticks / $interval;
|
||||
|
||||
# Average time an I/O takes on the block device
|
||||
my $servicetime =
|
||||
$total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
|
||||
|
||||
# Average wait time for an I/O from start to finish
|
||||
# (includes queue times et al)
|
||||
my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
|
||||
my $average_rd_wait = $read_ios ? $rd_ticks / $read_ios : 0;
|
||||
my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
|
||||
|
||||
my $average_rq_size_in_kb =
|
||||
$total_ios
|
||||
? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
|
||||
: 0;
|
||||
my $average_rd_rq_size_in_kb =
|
||||
$read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
|
||||
my $average_wr_rq_size_in_kb =
|
||||
$write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
|
||||
|
||||
my $util_print = $utilization / 10;
|
||||
|
||||
|
||||
if ( $mode eq 'latency' ) {
|
||||
print <<EOF;
|
||||
|
||||
util.value $util_print
|
||||
svctm.value $servicetime
|
||||
avgwait.value $average_wait
|
||||
avgrdwait.value $average_rd_wait
|
||||
avgwrwait.value $average_wr_wait
|
||||
|
||||
EOF
|
||||
}
|
||||
elsif ( $mode eq 'throughput' ) {
|
||||
|
||||
print <<EOF;
|
||||
|
||||
rdbytes.value $read_bytes_per_sec
|
||||
wrbytes.value $write_bytes_per_sec
|
||||
|
||||
EOF
|
||||
}
|
||||
elsif ( $mode eq 'iops' ) {
|
||||
|
||||
print <<EOF;
|
||||
|
||||
rdio.value $read_io_per_sec
|
||||
wrio.value $write_io_per_sec
|
||||
avgrqsz.value $average_rq_size_in_kb
|
||||
avgrdrqsz.value $average_rd_rq_size_in_kb
|
||||
avgwrrqsz.value $average_wr_rq_size_in_kb
|
||||
|
||||
EOF
|
||||
|
||||
}
|
||||
else {
|
||||
croak "Unknown mode $mode\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sub read_diskstats {
|
||||
|
||||
open STAT, '< /proc/diskstats'
|
||||
or croak "Failed to open '/proc/diskstats': $!\n";
|
||||
|
||||
my @lines;
|
||||
|
||||
for my $line (<STAT>) {
|
||||
|
||||
# Strip trailing newline and leading whitespace
|
||||
chomp $line;
|
||||
$line =~ s/^\s+//;
|
||||
|
||||
my @elems = split /\s+/, $line;
|
||||
|
||||
# We explicitly don't support old-style diskstats
|
||||
# There are situations where only _some_ lines (e.g.
|
||||
# partitions on older 2.6 kernels) have fewer stats
|
||||
# numbers, therefore we'll skip them silently
|
||||
if ( @elems != 14 ) {
|
||||
next;
|
||||
}
|
||||
push @lines, \@elems;
|
||||
}
|
||||
|
||||
close STAT or croak "Failed to close '/proc/diskstats': $!";
|
||||
return @lines;
|
||||
}
|
||||
|
||||
sub read_sysfs {
|
||||
|
||||
my ($want_device) = @_;
|
||||
|
||||
my @devices;
|
||||
my @lines;
|
||||
|
||||
if ( defined $want_device ) {
|
||||
|
||||
# sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
|
||||
$want_device =~ tr#/#!#;
|
||||
@devices = $want_device;
|
||||
}
|
||||
else {
|
||||
@devices = glob "/sys/block/*/stat";
|
||||
@devices = map { m!/sys/block/([^/]+)/stat! } @devices;
|
||||
}
|
||||
|
||||
|
||||
for my $cur_device (@devices) {
|
||||
my $stats_file = "/sys/block/$cur_device/stat";
|
||||
|
||||
open STAT, "< $stats_file"
|
||||
or croak "Failed to open '$stats_file': $!\n";
|
||||
|
||||
my $line = <STAT>;
|
||||
|
||||
# Trimming whitespace
|
||||
$line =~ s/^\s+//;
|
||||
chomp $line;
|
||||
|
||||
my @elems = split /\s+/, $line;
|
||||
|
||||
croak "'$stats_file' doesn't contain exactly 11 values. Aborting"
|
||||
if ( @elems != 11 );
|
||||
|
||||
# Translate the devicename back before storing the information
|
||||
$cur_device =~ tr#!#/#;
|
||||
|
||||
# Faking missing diskstats values
|
||||
unshift @elems, ( '', '', $cur_device );
|
||||
|
||||
push @lines, \@elems;
|
||||
|
||||
close STAT or croak "Failed to close '$stats_file': $!\n";
|
||||
}
|
||||
|
||||
return @lines;
|
||||
}
|
||||
|
||||
|
||||
sub parse_diskstats {
|
||||
|
||||
my ($want_device) = @_;
|
||||
|
||||
my @stats;
|
||||
|
||||
if ( glob "/sys/block/*/stat" ) {
|
||||
|
||||
@stats = read_sysfs($want_device);
|
||||
}
|
||||
else {
|
||||
@stats = read_diskstats();
|
||||
}
|
||||
|
||||
my %diskstats;
|
||||
|
||||
for my $entry (@stats) {
|
||||
|
||||
my %devstat;
|
||||
|
||||
# Hash-Slicing for fun and profit
|
||||
@devstat{
|
||||
qw(major minor devname
|
||||
rd_ios rd_merges rd_sectors rd_ticks
|
||||
wr_ios wr_merges wr_sectors wr_ticks
|
||||
ios_in_prog tot_ticks rq_ticks)
|
||||
}
|
||||
= @{$entry};
|
||||
|
||||
$diskstats{ $devstat{'devname'} } = \%devstat;
|
||||
}
|
||||
|
||||
return %diskstats;
|
||||
}
|
||||
|
||||
sub fetch_device_counters {
|
||||
|
||||
my ($want_device) = @_;
|
||||
|
||||
my %diskstats = parse_diskstats($want_device);
|
||||
|
||||
for my $devname ( keys %diskstats ) {
|
||||
|
||||
if ( $want_device eq $devname ) {
|
||||
return %{ $diskstats{$devname} };
|
||||
}
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
|
||||
# We use '+' (and formerly '-') as placeholder for '/' in device-names
|
||||
# used as calling name for the script.
|
||||
sub translate_device_name {
|
||||
|
||||
my ($device, $mode) = @_;
|
||||
|
||||
if ($mode eq 'FROM_FS') {
|
||||
|
||||
# Hackaround to mitigate issues with unwisely chosen former separator
|
||||
if ( not ($device =~ m/dm-\d+/)) {
|
||||
$device =~ tr#-+#//#;
|
||||
}
|
||||
|
||||
}
|
||||
elsif ($mode eq 'TO_FS') {
|
||||
|
||||
$device =~ tr#/#+#;
|
||||
|
||||
}
|
||||
else {
|
||||
croak "translate_device_name: Unknown mode\n";
|
||||
}
|
||||
|
||||
return $device;
|
||||
}
|
||||
|
||||
|
||||
sub fake_munin_plugin {
|
||||
my $eval_code = <<'EOF';
|
||||
|
||||
use Storable;
|
||||
my $storable_filename = basename($0);
|
||||
$storable_filename = "/tmp/munin-state-$storable_filename";
|
||||
|
||||
sub save_state {
|
||||
my @state = @_;
|
||||
|
||||
if ( not -e $storable_filename or -f $storable_filename ) {
|
||||
store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
|
||||
}
|
||||
else {
|
||||
croak "$storable_filename is probably not a regular file. Please delete it.\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub restore_state {
|
||||
|
||||
if (-f $storable_filename) {
|
||||
my $state = retrieve($storable_filename);
|
||||
return @{$state};
|
||||
}
|
||||
else {
|
||||
return undef;
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
eval($eval_code);
|
||||
}
|
||||
|
||||
sub translate_devicemapper_name {
|
||||
my ($device) = @_;
|
||||
|
||||
my ($want_minor) = $device =~ m/^dm-(\d+)$/;
|
||||
|
||||
croak "Failed to extract devicemapper id" unless defined ($want_minor);
|
||||
|
||||
my $dm_major = find_devicemapper_major();
|
||||
croak "Failed to get device-mapper major number\n" unless defined $dm_major;
|
||||
|
||||
for my $entry (glob "/dev/mapper/\*") {
|
||||
|
||||
my $rdev = (stat($entry))[6];
|
||||
my $major = floor($rdev / 256);
|
||||
my $minor = $rdev % 256;
|
||||
|
||||
if ($major == $dm_major && $minor == $want_minor) {
|
||||
|
||||
my $pretty_name = translate_lvm_name($entry);
|
||||
|
||||
return defined $pretty_name ? $pretty_name : $entry;
|
||||
|
||||
}
|
||||
}
|
||||
# Return original string if the device can't be found.
|
||||
return $device;
|
||||
}
|
||||
|
||||
|
||||
|
||||
sub translate_lvm_name {
|
||||
|
||||
my ($entry) = @_;
|
||||
|
||||
my $device_name = basename($entry);
|
||||
|
||||
# Check for single-dash-occurence to see if this could be a lvm devicemapper device.
|
||||
if ($device_name =~ m/(?<!-)-(?!-)/) {
|
||||
|
||||
# split device name into vg and lv parts
|
||||
my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
|
||||
return undef unless ( defined($vg) && defined($lv) );
|
||||
|
||||
# remove extraneous dashes from vg and lv names
|
||||
$vg =~ s/--/-/g;
|
||||
$lv =~ s/--/-/g;
|
||||
|
||||
$device_name = "$vg/$lv";
|
||||
|
||||
# Sanity check - does the constructed device name exist?
|
||||
if (stat("/dev/$device_name")) {
|
||||
return "$device_name";
|
||||
}
|
||||
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub find_devicemapper_major {
|
||||
|
||||
open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
|
||||
|
||||
my $dm_major;
|
||||
|
||||
for my $line (<FH>) {
|
||||
chomp $line;
|
||||
|
||||
my ($major, $name) = split /\s+/, $line, 2;
|
||||
|
||||
next unless defined $name;
|
||||
|
||||
if ($name eq 'device-mapper') {
|
||||
$dm_major = $major;
|
||||
last;
|
||||
}
|
||||
}
|
||||
close(FH);
|
||||
|
||||
return $dm_major;
|
||||
}
|
702
plugins/disk/linux_diskstats_
Executable file
702
plugins/disk/linux_diskstats_
Executable file
|
@ -0,0 +1,702 @@
|
|||
#!/usr/bin/perl -w
|
||||
# vim: sts=4 sw=4 ts=8
|
||||
|
||||
# Munin markers:
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf suggest
|
||||
|
||||
# Author: Michael Renner <michael.renner@amd.co.at>
|
||||
|
||||
# Version: 0.0.5, 2009-05-22
|
||||
|
||||
|
||||
|
||||
=head1 NAME
|
||||
|
||||
linux_diskstat_ - Munin plugin to monitor various values provided
|
||||
via C</proc/diskstats>
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
Linux 2.6 systems with extended block device statistics enabled.
|
||||
|
||||
|
||||
=head1 INTERPRETATION
|
||||
|
||||
Among the more self-describing or well-known values like C<throughput>
|
||||
(Bytes per second) there are a few which might need further introduction.
|
||||
|
||||
|
||||
=head2 Device Utilization
|
||||
|
||||
Linux provides a counter which increments in a millisecond-interval for as long
|
||||
as there are outstanding I/O requests. If this counter is close to 1000msec
|
||||
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
|
||||
provides values averaged over a 5 minute time frame per default, so it can't
|
||||
catch short-lived saturations, but it'll give a nice trend for semi-uniform
|
||||
load patterns as they're expected in most server or multi-user environments.
|
||||
|
||||
|
||||
=head2 Device IO Time
|
||||
|
||||
The C<Device IO Time> takes the counter described under C<Device Utilization>
|
||||
and divides it by the number of I/Os that happened in the given time frame,
|
||||
resulting in an average time per I/O on the block-device level.
|
||||
|
||||
This value can give you a good comparison base amongst different controllers,
|
||||
storage subsystems and disks for similiar workloads.
|
||||
|
||||
|
||||
=head2 Syscall Wait Time
|
||||
|
||||
These values describe the average time it takes between an application issuing
|
||||
a syscall resulting in a hit to a blockdevice to the syscall returning to the
|
||||
application.
|
||||
|
||||
The values are bound to be higher (at least for read requests) than the time
|
||||
it takes the device itself to fulfill the requests, since calling overhead,
|
||||
queuing times and probably a dozen other things are included in those times.
|
||||
|
||||
These are the values to watch out for when an user complains that C<the disks
|
||||
are too slow!>.
|
||||
|
||||
|
||||
=head3 What causes a block device hit?
|
||||
|
||||
A non-exhaustive list:
|
||||
|
||||
=over
|
||||
|
||||
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
|
||||
flag is set.
|
||||
|
||||
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
|
||||
is exceeded.
|
||||
|
||||
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
|
||||
modification of any of the values returned by stat(2), etc.)
|
||||
|
||||
=item * The pdflush daemon writing out dirtied pages
|
||||
|
||||
=item * (f)sync
|
||||
|
||||
=item * Swapping
|
||||
|
||||
=item * raw device I/O (mkfs, dd, etc.)
|
||||
|
||||
=back
|
||||
|
||||
=head1 ACKNOWLEDGEMENTS
|
||||
|
||||
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
|
||||
package written and maintained by Sebastien Godard.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
See C<Documentation/iostats.txt> in your Linux source tree for further information
|
||||
about the C<numbers> involved in this module.
|
||||
|
||||
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
|
||||
about the pdflush daemon.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Michael Renner <michael.renner@amd.co.at>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2
|
||||
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
use strict;
|
||||
|
||||
|
||||
use File::Basename;
|
||||
use Carp;
|
||||
use POSIX;
|
||||
|
||||
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
|
||||
# Don't try this at home
|
||||
eval { require Munin::Plugin; Munin::Plugin->import; };
|
||||
|
||||
if ($@) {
|
||||
fake_munin_plugin();
|
||||
}
|
||||
|
||||
|
||||
# Sanity check to ensure that the script is called the correct name.
|
||||
|
||||
if (basename($0) !~ /^linux_diskstat_/) {
|
||||
die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
|
||||
}
|
||||
|
||||
|
||||
############
|
||||
# autoconf #
|
||||
############
|
||||
|
||||
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
|
||||
my %stats;
|
||||
|
||||
# Capture any croaks on the way
|
||||
eval { %stats = parse_diskstats() };
|
||||
|
||||
if ( !$@ && keys %stats ) {
|
||||
|
||||
print "yes\n";
|
||||
exit 0;
|
||||
}
|
||||
else {
|
||||
print "no\n";
|
||||
exit 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
###########
|
||||
# suggest #
|
||||
###########
|
||||
|
||||
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
|
||||
|
||||
my %diskstats = parse_diskstats();
|
||||
|
||||
my %suggested_devices;
|
||||
|
||||
DEVICE:
|
||||
for my $devname ( sort keys %diskstats ) {
|
||||
|
||||
# Skip devices without traffic
|
||||
next
|
||||
if ( $diskstats{$devname}->{'rd_ios'} == 0
|
||||
&& $diskstats{$devname}->{'wr_ios'} == 0 );
|
||||
|
||||
for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
|
||||
|
||||
# Filter out devices (partitions) which are matched by existing ones
|
||||
# e.g. sda1 -> sda, c0d0p1 -> c0d0
|
||||
next DEVICE if ( $devname =~ m/$existing_device/ );
|
||||
}
|
||||
|
||||
push @{ $suggested_devices{'iops'} }, $devname;
|
||||
push @{ $suggested_devices{'throughput'} }, $devname;
|
||||
|
||||
# Only suggest latency graphs if the device supports it
|
||||
if ( $diskstats{$devname}->{'rd_ticks'} > 0
|
||||
|| $diskstats{$devname}->{'wr_ticks'} > 0 )
|
||||
{
|
||||
push @{ $suggested_devices{'latency'} }, $devname;
|
||||
}
|
||||
}
|
||||
|
||||
for my $mode ( keys %suggested_devices ) {
|
||||
for my $device ( sort @{ $suggested_devices{$mode} } ) {
|
||||
|
||||
my $printdev = translate_device_name($device, 'TO_FS');
|
||||
print "${mode}_$printdev\n";
|
||||
}
|
||||
}
|
||||
|
||||
exit 0;
|
||||
}
|
||||
|
||||
|
||||
# Reading the scripts invocation name and setting some parameters,
|
||||
# needed from here on
|
||||
|
||||
my $basename = basename($0);
|
||||
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
|
||||
|
||||
if ( not defined $device ) {
|
||||
croak "Didn't get a device name. Aborting\n";
|
||||
}
|
||||
|
||||
$device = translate_device_name($device, 'FROM_FS');
|
||||
|
||||
##########
|
||||
# config #
|
||||
##########
|
||||
|
||||
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
|
||||
|
||||
my $pretty_device = $device;
|
||||
|
||||
if ($device =~ /^dm-\d+$/) {
|
||||
$pretty_device = translate_devicemapper_name($device);
|
||||
}
|
||||
|
||||
if ( $mode eq 'latency' ) {
|
||||
|
||||
print <<EOF;
|
||||
graph_title Disk latency for /dev/$pretty_device
|
||||
graph_args --base 1000
|
||||
graph_category disk
|
||||
|
||||
util.label Device utilization (percent)
|
||||
util.type GAUGE
|
||||
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
|
||||
util.min 0
|
||||
svctm.label Average device IO time (ms)
|
||||
svctm.type GAUGE
|
||||
svctm.info Average time an I/O takes on the block device
|
||||
svctm.min 0
|
||||
avgwait.label Average IO Wait time (ms)
|
||||
avgwait.type GAUGE
|
||||
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
|
||||
avgwait.min 0
|
||||
avgrdwait.label Average Read IO Wait time (ms)
|
||||
avgrdwait.type GAUGE
|
||||
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
|
||||
avgrdwait.min 0
|
||||
avgwrwait.label Average Write IO Wait time (ms)
|
||||
avgwrwait.type GAUGE
|
||||
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
|
||||
avgwrwait.min 0
|
||||
|
||||
EOF
|
||||
|
||||
}
|
||||
elsif ( $mode eq 'throughput' ) {
|
||||
|
||||
print <<EOF;
|
||||
graph_title Disk throughput for /dev/$pretty_device
|
||||
graph_args --base 1024
|
||||
graph_vlabel Bytes/second
|
||||
graph_category disk
|
||||
|
||||
rdbytes.label Read Bytes
|
||||
rdbytes.type GAUGE
|
||||
rdbytes.min 0
|
||||
wrbytes.label Write Bytes
|
||||
wrbytes.type GAUGE
|
||||
wrbytes.min 0
|
||||
|
||||
EOF
|
||||
}
|
||||
elsif ( $mode eq 'iops' ) {
|
||||
|
||||
print <<EOF;
|
||||
graph_title Disk IOs for /dev/$pretty_device
|
||||
graph_args --base 1000
|
||||
graph_vlabel Units/second
|
||||
graph_category disk
|
||||
|
||||
rdio.label Read IO/sec
|
||||
rdio.type GAUGE
|
||||
rdio.min 0
|
||||
wrio.label Write IO/sec
|
||||
wrio.type GAUGE
|
||||
wrio.min 0
|
||||
avgrqsz.label Average Request Size (KiB)
|
||||
avgrqsz.type GAUGE
|
||||
avgrqsz.min 0
|
||||
avgrdrqsz.label Average Read Request Size (KiB)
|
||||
avgrdrqsz.type GAUGE
|
||||
avgrdrqsz.min 0
|
||||
avgwrrqsz.label Average Write Request Size (KiB)
|
||||
avgwrrqsz.type GAUGE
|
||||
avgwrrqsz.min 0
|
||||
|
||||
EOF
|
||||
|
||||
}
|
||||
else {
|
||||
croak "Unknown mode $mode\n";
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
|
||||
########
|
||||
# MAIN #
|
||||
########
|
||||
|
||||
|
||||
my %cur_diskstat = fetch_device_counters($device);
|
||||
|
||||
|
||||
my ( $prev_time, %prev_diskstat ) = restore_state();
|
||||
|
||||
save_state( time(), %cur_diskstat );
|
||||
|
||||
# Probably the first run for the given device, we need state to do our job,
|
||||
# so let's wait for the next run.
|
||||
exit if ( not defined $prev_time or not %prev_diskstat );
|
||||
|
||||
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
|
||||
|
||||
|
||||
|
||||
########
|
||||
# SUBS #
|
||||
########
|
||||
|
||||
sub calculate_and_print_values {
|
||||
my ( $prev_time, $prev_stats, $cur_stats ) = @_;
|
||||
|
||||
my $bytes_per_sector = 512;
|
||||
|
||||
my $interval = time() - $prev_time;
|
||||
|
||||
my $read_ios = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
|
||||
my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
|
||||
|
||||
my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
|
||||
my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
|
||||
|
||||
my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
|
||||
my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
|
||||
|
||||
my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
|
||||
|
||||
|
||||
my $read_io_per_sec = $read_ios / $interval;
|
||||
my $write_io_per_sec = $write_ios / $interval;
|
||||
|
||||
my $read_bytes_per_sec = $rd_sectors / $interval * $bytes_per_sector;
|
||||
my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
|
||||
|
||||
|
||||
my $total_ios = $read_ios + $write_ios;
|
||||
my $total_ios_per_sec = $total_ios / $interval;
|
||||
|
||||
# Utilization - or "how busy is the device"?
|
||||
# If the time spent for I/O was close to 1000msec for
|
||||
# a given second, the device is nearly 100% saturated.
|
||||
my $utilization = $tot_ticks / $interval;
|
||||
|
||||
# Average time an I/O takes on the block device
|
||||
my $servicetime =
|
||||
$total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
|
||||
|
||||
# Average wait time for an I/O from start to finish
|
||||
# (includes queue times et al)
|
||||
my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
|
||||
my $average_rd_wait = $read_ios ? $rd_ticks / $read_ios : 0;
|
||||
my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
|
||||
|
||||
my $average_rq_size_in_kb =
|
||||
$total_ios
|
||||
? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
|
||||
: 0;
|
||||
my $average_rd_rq_size_in_kb =
|
||||
$read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
|
||||
my $average_wr_rq_size_in_kb =
|
||||
$write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
|
||||
|
||||
my $util_print = $utilization / 10;
|
||||
|
||||
|
||||
if ( $mode eq 'latency' ) {
|
||||
print <<EOF;
|
||||
|
||||
util.value $util_print
|
||||
svctm.value $servicetime
|
||||
avgwait.value $average_wait
|
||||
avgrdwait.value $average_rd_wait
|
||||
avgwrwait.value $average_wr_wait
|
||||
|
||||
EOF
|
||||
}
|
||||
elsif ( $mode eq 'throughput' ) {
|
||||
|
||||
print <<EOF;
|
||||
|
||||
rdbytes.value $read_bytes_per_sec
|
||||
wrbytes.value $write_bytes_per_sec
|
||||
|
||||
EOF
|
||||
}
|
||||
elsif ( $mode eq 'iops' ) {
|
||||
|
||||
print <<EOF;
|
||||
|
||||
rdio.value $read_io_per_sec
|
||||
wrio.value $write_io_per_sec
|
||||
avgrqsz.value $average_rq_size_in_kb
|
||||
avgrdrqsz.value $average_rd_rq_size_in_kb
|
||||
avgwrrqsz.value $average_wr_rq_size_in_kb
|
||||
|
||||
EOF
|
||||
|
||||
}
|
||||
else {
|
||||
croak "Unknown mode $mode\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
sub read_diskstats {
|
||||
|
||||
open STAT, '< /proc/diskstats'
|
||||
or croak "Failed to open '/proc/diskstats': $!\n";
|
||||
|
||||
my @lines;
|
||||
|
||||
for my $line (<STAT>) {
|
||||
|
||||
# Strip trailing newline and leading whitespace
|
||||
chomp $line;
|
||||
$line =~ s/^\s+//;
|
||||
|
||||
my @elems = split /\s+/, $line;
|
||||
|
||||
# We explicitly don't support old-style diskstats
|
||||
# There are situations where only _some_ lines (e.g.
|
||||
# partitions on older 2.6 kernels) have fewer stats
|
||||
# numbers, therefore we'll skip them silently
|
||||
if ( @elems != 14 ) {
|
||||
next;
|
||||
}
|
||||
push @lines, \@elems;
|
||||
}
|
||||
|
||||
close STAT or croak "Failed to close '/proc/diskstats': $!";
|
||||
return @lines;
|
||||
}
|
||||
|
||||
sub read_sysfs {
|
||||
|
||||
my ($want_device) = @_;
|
||||
|
||||
my @devices;
|
||||
my @lines;
|
||||
|
||||
if ( defined $want_device ) {
|
||||
|
||||
# sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
|
||||
$want_device =~ tr#/#!#;
|
||||
@devices = $want_device;
|
||||
}
|
||||
else {
|
||||
@devices = glob "/sys/block/*/stat";
|
||||
@devices = map { m!/sys/block/([^/]+)/stat! } @devices;
|
||||
}
|
||||
|
||||
|
||||
for my $cur_device (@devices) {
|
||||
my $stats_file = "/sys/block/$cur_device/stat";
|
||||
|
||||
open STAT, "< $stats_file"
|
||||
or croak "Failed to open '$stats_file': $!\n";
|
||||
|
||||
my $line = <STAT>;
|
||||
|
||||
# Trimming whitespace
|
||||
$line =~ s/^\s+//;
|
||||
chomp $line;
|
||||
|
||||
my @elems = split /\s+/, $line;
|
||||
|
||||
croak "'$stats_file' doesn't contain exactly 11 values. Aborting"
|
||||
if ( @elems != 11 );
|
||||
|
||||
# Translate the devicename back before storing the information
|
||||
$cur_device =~ tr#!#/#;
|
||||
|
||||
# Faking missing diskstats values
|
||||
unshift @elems, ( '', '', $cur_device );
|
||||
|
||||
push @lines, \@elems;
|
||||
|
||||
close STAT or croak "Failed to close '$stats_file': $!\n";
|
||||
}
|
||||
|
||||
return @lines;
|
||||
}
|
||||
|
||||
|
||||
sub parse_diskstats {
|
||||
|
||||
my ($want_device) = @_;
|
||||
|
||||
my @stats;
|
||||
|
||||
if ( glob "/sys/block/*/stat" ) {
|
||||
|
||||
@stats = read_sysfs($want_device);
|
||||
}
|
||||
else {
|
||||
@stats = read_diskstats();
|
||||
}
|
||||
|
||||
my %diskstats;
|
||||
|
||||
for my $entry (@stats) {
|
||||
|
||||
my %devstat;
|
||||
|
||||
# Hash-Slicing for fun and profit
|
||||
@devstat{
|
||||
qw(major minor devname
|
||||
rd_ios rd_merges rd_sectors rd_ticks
|
||||
wr_ios wr_merges wr_sectors wr_ticks
|
||||
ios_in_prog tot_ticks rq_ticks)
|
||||
}
|
||||
= @{$entry};
|
||||
|
||||
$diskstats{ $devstat{'devname'} } = \%devstat;
|
||||
}
|
||||
|
||||
return %diskstats;
|
||||
}
|
||||
|
||||
sub fetch_device_counters {
|
||||
|
||||
my ($want_device) = @_;
|
||||
|
||||
my %diskstats = parse_diskstats($want_device);
|
||||
|
||||
for my $devname ( keys %diskstats ) {
|
||||
|
||||
if ( $want_device eq $devname ) {
|
||||
return %{ $diskstats{$devname} };
|
||||
}
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
|
||||
# We use '+' (and formerly '-') as placeholder for '/' in device-names
|
||||
# used as calling name for the script.
|
||||
sub translate_device_name {
|
||||
|
||||
my ($device, $mode) = @_;
|
||||
|
||||
if ($mode eq 'FROM_FS') {
|
||||
|
||||
# Hackaround to mitigate issues with unwisely chosen former separator
|
||||
if ( not ($device =~ m/dm-\d+/)) {
|
||||
$device =~ tr#-+#//#;
|
||||
}
|
||||
|
||||
}
|
||||
elsif ($mode eq 'TO_FS') {
|
||||
|
||||
$device =~ tr#/#+#;
|
||||
|
||||
}
|
||||
else {
|
||||
croak "translate_device_name: Unknown mode\n";
|
||||
}
|
||||
|
||||
return $device;
|
||||
}
|
||||
|
||||
|
||||
sub fake_munin_plugin {
|
||||
my $eval_code = <<'EOF';
|
||||
|
||||
use Storable;
|
||||
my $storable_filename = basename($0);
|
||||
$storable_filename = "/tmp/munin-state-$storable_filename";
|
||||
|
||||
sub save_state {
|
||||
my @state = @_;
|
||||
|
||||
if ( not -e $storable_filename or -f $storable_filename ) {
|
||||
store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
|
||||
}
|
||||
else {
|
||||
croak "$storable_filename is probably not a regular file. Please delete it.\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub restore_state {
|
||||
|
||||
if (-f $storable_filename) {
|
||||
my $state = retrieve($storable_filename);
|
||||
return @{$state};
|
||||
}
|
||||
else {
|
||||
return undef;
|
||||
}
|
||||
}
|
||||
EOF
|
||||
|
||||
eval($eval_code);
|
||||
}
|
||||
|
||||
sub translate_devicemapper_name {
|
||||
my ($device) = @_;
|
||||
|
||||
my ($want_minor) = $device =~ m/^dm-(\d+)$/;
|
||||
|
||||
croak "Failed to extract devicemapper id" unless defined ($want_minor);
|
||||
|
||||
my $dm_major = find_devicemapper_major();
|
||||
croak "Failed to get device-mapper major number\n" unless defined $dm_major;
|
||||
|
||||
for my $entry (glob "/dev/mapper/\*") {
|
||||
|
||||
my $rdev = (stat($entry))[6];
|
||||
my $major = floor($rdev / 256);
|
||||
my $minor = $rdev % 256;
|
||||
|
||||
if ($major == $dm_major && $minor == $want_minor) {
|
||||
|
||||
my $pretty_name = translate_lvm_name($entry);
|
||||
|
||||
return defined $pretty_name ? $pretty_name : $entry;
|
||||
|
||||
}
|
||||
}
|
||||
# Return original string if the device can't be found.
|
||||
return $device;
|
||||
}
|
||||
|
||||
|
||||
|
||||
sub translate_lvm_name {
|
||||
|
||||
my ($entry) = @_;
|
||||
|
||||
my $device_name = basename($entry);
|
||||
|
||||
# Check for single-dash-occurence to see if this could be a lvm devicemapper device.
|
||||
if ($device_name =~ m/(?<!-)-(?!-)/) {
|
||||
|
||||
# split device name into vg and lv parts
|
||||
my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
|
||||
return undef unless ( defined($vg) && defined($lv) );
|
||||
|
||||
# remove extraneous dashes from vg and lv names
|
||||
$vg =~ s/--/-/g;
|
||||
$lv =~ s/--/-/g;
|
||||
|
||||
$device_name = "$vg/$lv";
|
||||
|
||||
# Sanity check - does the constructed device name exist?
|
||||
if (stat("/dev/$device_name")) {
|
||||
return "$device_name";
|
||||
}
|
||||
|
||||
}
|
||||
return undef;
|
||||
}
|
||||
|
||||
sub find_devicemapper_major {
|
||||
|
||||
open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
|
||||
|
||||
my $dm_major;
|
||||
|
||||
for my $line (<FH>) {
|
||||
chomp $line;
|
||||
|
||||
my ($major, $name) = split /\s+/, $line, 2;
|
||||
|
||||
next unless defined $name;
|
||||
|
||||
if ($name eq 'device-mapper') {
|
||||
$dm_major = $major;
|
||||
last;
|
||||
}
|
||||
}
|
||||
close(FH);
|
||||
|
||||
return $dm_major;
|
||||
}
|
30
plugins/disk/log_sizes
Executable file
30
plugins/disk/log_sizes
Executable file
|
@ -0,0 +1,30 @@
|
|||
#!/bin/sh
|
||||
|
||||
#H=`echo $0 | awk -F_ '{print $2}'`
|
||||
|
||||
#LOGFILES=`ls /var/log/messages /var/log/syslog /var/log/daemon.log /myapplication/logs/*.log`
|
||||
LOGFILES="/var/log/messages /var/log/syslog /var/log/daemon.log"
|
||||
|
||||
if [ "$1" = "config" ] ; then
|
||||
|
||||
echo "graph_title log sizes"
|
||||
echo "graph_category disk"
|
||||
echo "graph_info this graph shows sizes of log files"
|
||||
echo "graph_vlabel size (bytes)"
|
||||
|
||||
for F in $LOGFILES
|
||||
do
|
||||
MF=`echo $F | sed 's/[-\/\.]/_/g'`
|
||||
echo "$MF.label $F"
|
||||
done
|
||||
|
||||
else
|
||||
|
||||
for F in $LOGFILES
|
||||
do
|
||||
MF=`echo $F | sed 's/[-\/\.]/_/g'`
|
||||
echo -n "$MF.value "
|
||||
stat --printf="%s\n" $F
|
||||
done
|
||||
|
||||
fi
|
58
plugins/disk/lvm_
Executable file
58
plugins/disk/lvm_
Executable file
|
@ -0,0 +1,58 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Script to monitor disk usage.
|
||||
#
|
||||
# By PatrickDK
|
||||
#
|
||||
# Parameters understood:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - used by munin-config)
|
||||
#
|
||||
# $Log$
|
||||
#
|
||||
# Magic markers (optional - used by munin-config and installation
|
||||
# scripts):
|
||||
#
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
if [ "$1" = "autoconf" ]; then
|
||||
echo yes
|
||||
exit 0
|
||||
fi
|
||||
|
||||
vg=`echo $0 | awk '{ sub(".*lvm_","",\$1); print \$1; }'`
|
||||
|
||||
clean_name() {
|
||||
echo $1 | sed 's/[\/.-]/_/g'
|
||||
}
|
||||
|
||||
|
||||
if [ "$1" = "config" ]; then
|
||||
|
||||
echo 'graph_title Logical Volume usage'
|
||||
echo 'graph_args --base 1000 -l 0'
|
||||
# echo 'graph_vlabel %'
|
||||
echo 'graph_category disk'
|
||||
echo 'graph_info This graph shows disk usage on the machine.'
|
||||
echo "free.label free"
|
||||
echo "free.draw AREA"
|
||||
lvs --units b --nosuffix --noheadings | grep "$vg" | while read i; do
|
||||
name=`clean_name $i`
|
||||
echo -n "$name.label "
|
||||
echo $i | awk '{ print $1 }'
|
||||
echo "$name.draw STACK"
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
i=`vgs --units b --nosuffix --noheadings | grep "$vg"`
|
||||
echo -n "free.value "
|
||||
echo $i | awk '{ print $7 }'
|
||||
|
||||
lvs --units b --nosuffix --noheadings | grep "$vg" | while read i; do
|
||||
name=`clean_name $i`
|
||||
echo -n "$name.value "
|
||||
echo $i | awk '{ print $4 }'
|
||||
done
|
44
plugins/disk/lvm_snap_used
Executable file
44
plugins/disk/lvm_snap_used
Executable file
|
@ -0,0 +1,44 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Plugin to monitor the % of allocated area of a LVM snapshot
|
||||
#
|
||||
# Parameters:
|
||||
#
|
||||
# config
|
||||
# autoconf
|
||||
#
|
||||
# Configuration variables
|
||||
# no config variables
|
||||
#
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
#
|
||||
# 2011/05/20 - pmoranga - initial version
|
||||
#
|
||||
# 2012/01/27 - Sébastien Gross
|
||||
# - Fix lvdisplay path
|
||||
|
||||
lvdisplay=$(which lvdisplay)
|
||||
|
||||
if [ "$1" = "autoconf" ]; then
|
||||
if test -n "${lvdisplay}"; then
|
||||
echo yes
|
||||
exit 0
|
||||
fi
|
||||
echo "no lvdisplay found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
|
||||
if [ "$1" = "config" ]; then
|
||||
echo 'graph_title Allocated space for snapshot'
|
||||
echo 'graph_vlabel %'
|
||||
echo 'graph_category disk'
|
||||
echo 'graph_args --base 100'
|
||||
${lvdisplay} -C | awk '$3 ~ /^s/{print $1".label "$1" snapshot of "$5} '
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
${lvdisplay} -C | awk '$3 ~ /^s/{print $1".value",int($6)} '
|
120
plugins/disk/lvm_usage
Executable file
120
plugins/disk/lvm_usage
Executable file
|
@ -0,0 +1,120 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
=head1 NAME
|
||||
|
||||
lvm_usage - Plugin to monitor usage of LVM volume groups
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
Must be run as root:
|
||||
|
||||
[lvm_usage]
|
||||
user root
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Gábor Gombás <gombasg@sztaki.hu>
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2 or later
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use Munin::Plugin;
|
||||
use Carp;
|
||||
|
||||
need_multigraph();
|
||||
|
||||
if ($ARGV[0] and $ARGV[0] eq 'autoconf') {
|
||||
if (-c "/dev/mapper/control") {
|
||||
print "yes\n";
|
||||
}
|
||||
else {
|
||||
print "no (/dev/mapper/control is missing)\n";
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my %vgs;
|
||||
|
||||
open(VGS, "vgs --units b --nosuffix --noheadings -o vg_name,vg_size,vg_free |")
|
||||
or croak("Failed to run 'lvs': " . $!);
|
||||
while (my $line = <VGS>) {
|
||||
chomp $line;
|
||||
$line =~ s/^\s+//;
|
||||
my ($vg_name, $vg_size, $vg_free) = split(/\s+/, $line);
|
||||
$vgs{$vg_name}->{size} = $vg_size unless $vgs{$vg_name}->{size};
|
||||
$vgs{$vg_name}->{free} = $vg_free unless $vgs{$vg_name}->{free};
|
||||
$vgs{$vg_name}->{lvs} = {};
|
||||
}
|
||||
close VGS;
|
||||
|
||||
open(LVS, "lvs --units b --nosuffix --noheadings -o vg_name,lv_name,lv_size |")
|
||||
or croak("Failed to run 'lvs': " . $!);
|
||||
while (my $line = <LVS>) {
|
||||
chomp $line;
|
||||
$line =~ s/^\s+//;
|
||||
my ($vg_name, $lv_name, $lv_size) = split(/\s+/, $line);
|
||||
$vgs{$vg_name}->{lvs}->{$lv_name} = $lv_size;
|
||||
}
|
||||
close LVS;
|
||||
|
||||
if ($ARGV[0] and $ARGV[0] eq 'config') {
|
||||
print "multigraph lvm_usage\n";
|
||||
print "graph_title LVM volume group usage\n";
|
||||
print "graph_args --base 1024 --lower-limit 0 --upper-limit 100\n";
|
||||
print "graph_vlabel %\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_order " . join(' ', map { clean_fieldname($_) } (sort keys %vgs)) . "\n";
|
||||
for my $vg (sort keys %vgs) {
|
||||
my $id = clean_fieldname($vg);
|
||||
print "$id.label $vg\n";
|
||||
print "$id.type GAUGE\n";
|
||||
print "$id.draw LINE2\n";
|
||||
}
|
||||
|
||||
for my $vg (sort keys %vgs) {
|
||||
my $id = clean_fieldname($vg);
|
||||
|
||||
print "multigraph lvm_usage.$id\n";
|
||||
print "graph_title Volume group usage ($vg)\n";
|
||||
print "graph_args --base 1024 --lower-limit 0\n";
|
||||
print "graph_vlabel bytes\n";
|
||||
print "graph_category disk\n";
|
||||
print "__free.label Free space\n";
|
||||
print "__free.draw AREA\n";
|
||||
|
||||
foreach my $lv (sort keys %{$vgs{$vg}->{lvs}}) {
|
||||
my $id = clean_fieldname($lv);
|
||||
print "$id.label $lv\n";
|
||||
print "$id.draw STACK\n";
|
||||
}
|
||||
}
|
||||
|
||||
exit 0;
|
||||
}
|
||||
|
||||
print "multigraph lvm_usage\n";
|
||||
for my $vg (sort keys %vgs) {
|
||||
my $id = clean_fieldname($vg);
|
||||
my $used = $vgs{$vg}->{'size'} - $vgs{$vg}->{'free'};
|
||||
print "$id.value " . int($used * 100 / $vgs{$vg}->{'size'}) . "\n";
|
||||
}
|
||||
|
||||
for my $vg (sort keys %vgs) {
|
||||
my $id = clean_fieldname($vg);
|
||||
print "multigraph lvm_usage.$id\n";
|
||||
print "__free.value " . $vgs{$vg}->{free} . "\n";
|
||||
|
||||
foreach my $lv (sort keys %{$vgs{$vg}->{lvs}}) {
|
||||
my $id = clean_fieldname($lv);
|
||||
print "$id.value " . $vgs{$vg}->{lvs}->{$lv} . "\n";
|
||||
}
|
||||
}
|
283
plugins/disk/md_iostat_
Executable file
283
plugins/disk/md_iostat_
Executable file
|
@ -0,0 +1,283 @@
|
|||
#!/usr/bin/perl -w
|
||||
#
|
||||
# Plugin for watching io-bound traffic (in blocks) on disks.
|
||||
#
|
||||
# Usage: Link or copy into /etc/lrrd/client.d/
|
||||
#
|
||||
# Parameters:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - used by lrrd-config)
|
||||
#
|
||||
# $Log$
|
||||
# Revision 1.14 2004/12/10 18:51:44 jimmyo
|
||||
# linux/apt* has been forced to LANG=C, to get predictable output.
|
||||
#
|
||||
# Revision 1.13 2004/12/10 10:47:49 jimmyo
|
||||
# Change name from ${scale} to ${graph_period}, to be more consistent.
|
||||
#
|
||||
# Revision 1.12 2004/12/09 22:12:56 jimmyo
|
||||
# Added "graph_period" option, to make "graph_sums" usable.
|
||||
#
|
||||
# Revision 1.11 2004/11/21 00:17:12 jimmyo
|
||||
# Changed a lot of plugins so they use DERIVE instead of COUNTER.
|
||||
#
|
||||
# Revision 1.10 2004/11/20 23:58:22 jimmyo
|
||||
# The linux/iostat plugin now ignores devices without traffic (Deb#267195).
|
||||
#
|
||||
# Revision 1.9 2004/09/25 22:29:16 jimmyo
|
||||
# Added info fields to a bunch of plugins.
|
||||
#
|
||||
# Revision 1.8 2004/08/24 13:37:29 ilmari
|
||||
# Add total line
|
||||
#
|
||||
# Revision 1.7 2004/05/20 13:57:12 jimmyo
|
||||
# Set categories to some of the plugins.
|
||||
#
|
||||
# Revision 1.6 2004/02/02 18:18:07 jimmyo
|
||||
# Changed to an informative vlabel, since the field.label information has been made shorter.
|
||||
#
|
||||
# Revision 1.5 2004/02/02 17:52:32 jimmyo
|
||||
# Linux/iostat now shows only disks also on machines without devfs.
|
||||
#
|
||||
# Revision 1.4 2004/02/02 16:54:38 jimmyo
|
||||
# Make the iostat plugin work properly.
|
||||
#
|
||||
# Revision 1.3 2004/02/02 16:53:53 jimmyo
|
||||
# Make the iostat plugin work properly.
|
||||
#
|
||||
# Revision 1.2 2004/01/31 19:24:52 jimmyo
|
||||
# Rewrite of linux/iostat by Mike Fedyk (Deb##223373,224113).
|
||||
#
|
||||
# Revision 1.1 2004/01/02 18:50:01 jimmyo
|
||||
# Renamed occurrances of lrrd -> munin
|
||||
#
|
||||
# Revision 1.1.1.1 2004/01/02 15:18:07 jimmyo
|
||||
# Import of LRRD CVS tree after renaming to Munin
|
||||
#
|
||||
# Revision 1.5 2003/12/18 18:09:32 jimmyo
|
||||
# Added total line
|
||||
#
|
||||
# Revision 1.4 2003/12/18 11:01:51 jimmyo
|
||||
# Fix by_dev compare issue.
|
||||
#
|
||||
# Revision 1.3 2003/12/16 17:51:08 jimmyo
|
||||
# Plugin linux/iostat modified. Now runs on 2.6, and now "mirrors" i/o like eth* et al. (Deb#224113, Deb#223373)
|
||||
#
|
||||
# Revision 1.2 2003/11/07 17:43:16 jimmyo
|
||||
# Cleanups and log entries
|
||||
#
|
||||
#
|
||||
#
|
||||
# Magic markers (optional - used by lrrd-config and some installation
|
||||
# scripts):
|
||||
#
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
|
||||
use strict;
|
||||
use Data::Dumper;
|
||||
|
||||
# Where to get stats from
|
||||
my $detailed_present = 0;
|
||||
my $stat_present = 0;
|
||||
# And md things here?
|
||||
my $mdstat_present = 0;
|
||||
|
||||
if ( (-f '/proc/diskstats') or
|
||||
(system("grep -q 'rio rmerge rsect ruse wio wmerge wsect wuse running use aveq' /proc/partitions") == 0) ) {
|
||||
$detailed_present = 1;
|
||||
} elsif (system("grep -q '^disk_io: [^ ]' /proc/stat") == 0) {
|
||||
$stat_present = 1;
|
||||
}
|
||||
|
||||
$mdstat_present = -f '/proc/mdstat';
|
||||
|
||||
if ( defined($ARGV[0]) and $ARGV[0] eq "autoconf") {
|
||||
if ($mdstat_present and ($detailed_present or $stat_present)) {
|
||||
print "yes\n";
|
||||
exit 0;
|
||||
}
|
||||
print "no\n";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
my %devs;
|
||||
my %nametodev;
|
||||
|
||||
if ($detailed_present) {
|
||||
&fetch_detailed;
|
||||
} elsif ($stat_present) {
|
||||
# Falling back to /proc/stat
|
||||
&fetch_stat;
|
||||
}
|
||||
|
||||
my $md = $0;
|
||||
$md =~ s/.*_//;
|
||||
|
||||
open(MD,"/proc/mdstat");
|
||||
|
||||
my ($dev,$mdstatus,$raid,@devs);
|
||||
|
||||
while (<MD>) {
|
||||
next unless /^$md/o;
|
||||
($dev, $mdstatus) = split(/\s+:\s+/,$_,2);
|
||||
($mdstatus, $raid, @devs) = split(/\s+/,$mdstatus);
|
||||
last;
|
||||
}
|
||||
|
||||
# print "DEVICES: ",join(', ',@devs),"\n";
|
||||
|
||||
# Remove unwanted things like raid device number, partition number
|
||||
# and sort nicely.
|
||||
@devs = sort by_dev map { s/\d*\[.*\]$//; $_; } @devs;
|
||||
|
||||
# Insert the raid device into the mix.
|
||||
unshift(@devs,$md);
|
||||
|
||||
# And translate to the device name used by the datastructures.
|
||||
@devs = map { $nametodev{$_}; } @devs;
|
||||
|
||||
close(MD);
|
||||
my $i=0;
|
||||
|
||||
if ( $ARGV[0] and $ARGV[0] eq "config") {
|
||||
print "graph_title IOstat for $md\n";
|
||||
print "graph_args --base 1024 -l 0\n";
|
||||
print "graph_vlabel blocks / \${graph_period} read (-) / written (+)\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_info This graph shows the I/O to and from block devices comprising the $raid device $md.\n";
|
||||
|
||||
my @grapho = @devs;
|
||||
# The first shall be last
|
||||
push(@grapho,shift(@grapho));
|
||||
|
||||
print "graph_order";
|
||||
foreach my $key (@grapho) {
|
||||
print " ", $key, "_read ", $key, "_write ";
|
||||
}
|
||||
print "\n";
|
||||
foreach my $key (@devs) {
|
||||
print $key . "_read.label $devs{$key}->{name}\n";
|
||||
print $key . "_read.type DERIVE\n";
|
||||
print $key . "_read.max 900000\n";
|
||||
print $key . "_read.min 0\n";
|
||||
print $key . "_read.graph no\n";
|
||||
print $key . "_write.label $devs{$key}->{name}\n";
|
||||
print $key . "_write.info I/O on device $devs{$key}->{name}\n";
|
||||
print $key . "_write.type DERIVE\n";
|
||||
print $key . "_write.max 900000\n";
|
||||
print $key . "_write.min 0\n";
|
||||
print $key . "_write.negative " . $key . "_read\n";
|
||||
if ($i == 0) {
|
||||
print "${key}_read.draw LINE2\n";
|
||||
print "${key}_write.draw LINE2\n";
|
||||
} elsif ($i == 1) {
|
||||
print "${key}_read.draw AREA\n";
|
||||
print "${key}_write.draw AREA\n";
|
||||
} else {
|
||||
print "${key}_read.draw STACK\n";
|
||||
print "${key}_write.draw STACK\n";
|
||||
}
|
||||
$i++;
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
# print Dumper \%nametodev;
|
||||
# print Dumper \%devs;
|
||||
|
||||
foreach my $key (@devs) {
|
||||
# print "Device name: $key, iostat name: ",$nametodev{$key},"\n";
|
||||
print $key, "_read.value ", $devs{$key}->{rsect}, "\n";
|
||||
print $key, "_write.value ", $devs{$key}->{wsect}, "\n";
|
||||
}
|
||||
|
||||
sub by_dev {
|
||||
return $a cmp $b;
|
||||
}
|
||||
|
||||
sub fetch_stat() {
|
||||
open (IN, "/proc/stat") or die "Could not open /proc/stat for reading: $!\n";
|
||||
|
||||
while (<IN>) {
|
||||
next unless (/^disk_io:\s*(.+)\s*/);
|
||||
foreach my $dev (split /\s+/) {
|
||||
next unless $dev =~ /\S/;
|
||||
next unless ($dev =~ /\((\d+),(\d+)\):\(\d+,(\d+),(\d+),(\d+),(\d+)\)/);
|
||||
|
||||
my $name = "dev".$1."_".$2;
|
||||
|
||||
$nametodev{$name}=$name;
|
||||
|
||||
$devs{$name} = {
|
||||
name => $name,
|
||||
rio => $3,
|
||||
rsect => $4,
|
||||
wio => $5,
|
||||
wsect => $6
|
||||
};
|
||||
}
|
||||
}
|
||||
close (IN);
|
||||
}
|
||||
|
||||
my %maj_count;
|
||||
sub get_disk_count()
|
||||
{
|
||||
my @disk_count;
|
||||
my $major = $_[0];
|
||||
$maj_count{$major} = 0 unless exists($maj_count{$major});
|
||||
$disk_count[0] = $maj_count{$major}++;
|
||||
die "Could not find disk_count for major: $major" unless (exists($disk_count[0]));
|
||||
return $disk_count[0];
|
||||
}
|
||||
|
||||
|
||||
sub fetch_detailed() {
|
||||
|
||||
if (open(DETAILED, "/proc/diskstats")
|
||||
or open(DETAILED, "/proc/partitions")) {
|
||||
while (<DETAILED>) {
|
||||
if (/^\s+(\d+)\s+\d+\s*\d*\s+([[:alpha:][:digit:]\/]+)\s+(.*)/) {
|
||||
my @fields = split(/\s+/, $3);
|
||||
my $tmpnam = $2;
|
||||
my $major = $1;
|
||||
if ($tmpnam =~ /^md\d+/) {
|
||||
# That's fine, we want raid disks reported here.
|
||||
} elsif ($tmpnam =~ /\d+$/ ) {
|
||||
# Special case for devices like cXdXpX,
|
||||
# like the cciss driver
|
||||
next unless $tmpnam =~ /\/c\d+d\d+$/
|
||||
}
|
||||
next unless grep { $_ } @fields;
|
||||
|
||||
$tmpnam =~ s/\/[[:alpha:]]+(\d+)/\/$1/g;
|
||||
$tmpnam =~ s/^([^\/]+)\//$1/;
|
||||
$tmpnam =~ s/\/disc$//;
|
||||
|
||||
my $devnam = "dev".$major."_".&get_disk_count($major);
|
||||
|
||||
$nametodev{$tmpnam} = $devnam;
|
||||
|
||||
$devs{$devnam} = {
|
||||
major => $major,
|
||||
name => $tmpnam,
|
||||
rio => $fields[0],
|
||||
rmerge => $fields[1],
|
||||
rsect => $fields[2],
|
||||
ruse => $fields[3],
|
||||
wio => $fields[4],
|
||||
wmerge => $fields[5],
|
||||
wsect => $fields[6],
|
||||
wuse => $fields[7],
|
||||
running => $fields[8],
|
||||
use => $fields[9],
|
||||
aveq => $fields[10]
|
||||
};
|
||||
}
|
||||
}
|
||||
close (DETAILED);
|
||||
}
|
||||
}
|
||||
# vim:syntax=perl
|
191
plugins/disk/megaraid-controller-information
Executable file
191
plugins/disk/megaraid-controller-information
Executable file
|
@ -0,0 +1,191 @@
|
|||
#!/usr/bin/perl -w
|
||||
#
|
||||
# Munin plugin for MegaRAID
|
||||
# This plugin can graph:- Currently Drive Temperature and Error Count
|
||||
#
|
||||
#---------------------
|
||||
# Examples
|
||||
# Create a symbolic link to MegaRaid_<AdapterNumber>_<temp|media|other|predictive>
|
||||
# ln -s /usr/share/munin/plugins/MegaRaid_ /etc/munin/plugins/MegaRaid_0_temp
|
||||
# graph temperature on adapter 0
|
||||
#
|
||||
# ln -s /usr/share/munin/plugins/MegaRaid_ /etc/munin/plugins/MegaRaid_0_error
|
||||
# graph media errors on adapter 0
|
||||
#
|
||||
# ln -s /usr/share/munin/plugins/MegaRaid_ /etc/munin/plugins/MegaRaid_1_temp
|
||||
# graph temperature on adapter 1
|
||||
#
|
||||
#---------------------
|
||||
# Log
|
||||
# Revision 0.1 2011/04/16 idobson
|
||||
# -First version only basic support of the MegaRaid controller
|
||||
#
|
||||
# Revision 0.2 2011/04/17 fkatzenb
|
||||
# -Added bash statement to remove the log file created each time MegaCli64 is ran
|
||||
# -Added a few comments and visual changes
|
||||
#
|
||||
# Revision 1.0 2011/04/17 fkatzenb
|
||||
# -Revamped Code to symbolic link for sensor type and future growth
|
||||
#
|
||||
# Revision 1.1 2011/04/17 fkatzenb
|
||||
# -Revised scalling
|
||||
#
|
||||
# Revision 1.2 2011/04/28 fkatzenb
|
||||
# -Added support for graph_info support
|
||||
# -Added warning & critical alerts support
|
||||
# -Added data info
|
||||
#
|
||||
# Revision 2.0 2011/04/29 fkatzenb
|
||||
# -Added remaining support for SMART Errors
|
||||
#
|
||||
# Revision 2.1 2011/04/29 fkatzenb
|
||||
# -Added version information for in the graph description
|
||||
#
|
||||
#
|
||||
#---------------------
|
||||
#
|
||||
# Add the following to your /etc/munin/plugin-conf.d/munin-node:
|
||||
#
|
||||
# [MegaRaid_*]
|
||||
# user root
|
||||
#
|
||||
#---------------------
|
||||
#
|
||||
#
|
||||
# Magic markers (optional - used by munin-config and installation scripts):
|
||||
#
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf
|
||||
#
|
||||
my $DisplayVer=2.1;
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my $DevID=0; #Device Number found
|
||||
my $DevData=0; #Device Data found
|
||||
|
||||
# Parse out Adapter number and parameter desired from file name and remove whitespace
|
||||
my $Parameters=`basename $0 | sed 's/^MegaRaid_//g' | tr '_' '-'` ;
|
||||
chomp $Parameters;
|
||||
my ($Adapter,$Type)=split(/-/,$Parameters);
|
||||
|
||||
# Locate MegaCli64 application and remove whitespace
|
||||
my $Command=`which MegaCli64`;
|
||||
chomp $Command;
|
||||
|
||||
# Use this to define future parameters to monitor
|
||||
my %config = (
|
||||
temp => {
|
||||
lookfor => 'Drive Temperature :',
|
||||
label => 'Temp',
|
||||
title => "MegaRAID Adapter $Adapter: Drive Temperatures",
|
||||
vtitle => 'Celsius',
|
||||
graph_args => '--base 1000 -l 0',
|
||||
warning => '55',
|
||||
critical => '65',
|
||||
info_tag => "Temperature (C)",
|
||||
description => "Internal Temperatures for drives on Adapter $Adapter."
|
||||
},
|
||||
error => {
|
||||
lookfor => 'Media Error Count: ',
|
||||
label => 'Media Err',
|
||||
title => "MegaRAID Adapter $Adapter: Media Errors (SMART)",
|
||||
vtitle => 'Number of Errors',
|
||||
graph_args => '--base 1000 -l 0',
|
||||
warning => '',
|
||||
critical => '',
|
||||
info_tag => "Media Errors (SMART)",
|
||||
description => "Number of SMART errors related to the drive's media on Adapter $Adapter."
|
||||
},
|
||||
other => {
|
||||
lookfor => 'Other Error Count: ',
|
||||
label => 'Other Err',
|
||||
title => "MegaRAID Adapter $Adapter: Others Errors (SMART)",
|
||||
vtitle => 'Number of Errors',
|
||||
graph_args => '--base 1000 -l 0',
|
||||
warning => '',
|
||||
critical => '',
|
||||
info_tag => "Other Errors (SMART)",
|
||||
description => "Number of SMART errors not related to the drive's media on Adapter $Adapter."
|
||||
},
|
||||
predictive => {
|
||||
lookfor => 'Predictive Failure Count: ',
|
||||
label => 'Predictive Err',
|
||||
title => "MegaRAID Adapter $Adapter: Predictive Errors (SMART)",
|
||||
vtitle => 'Number of Errors',
|
||||
graph_args => '--base 1000 -l 0',
|
||||
warning => '',
|
||||
critical => '',
|
||||
info_tag => "Predictive Errors (SMART)",
|
||||
description => "Number of SMART errors for each drive on Adapter $Adapter."
|
||||
}
|
||||
);
|
||||
|
||||
#Auto config options
|
||||
if ($ARGV[0] and $ARGV[0] eq "autoconf" ) {
|
||||
if (-e $Command ) {
|
||||
print "yes\n";
|
||||
exit 0;
|
||||
} else {
|
||||
print "no\n";
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
#Read Output of MegaRaid command
|
||||
$Command.=" -PDList -a".$Adapter;
|
||||
my @Output=qx($Command);
|
||||
|
||||
#Munin Config Options
|
||||
if ($ARGV[0] and $ARGV[0] eq "config"){
|
||||
print "graph_title $config{$Type}->{title}\n";
|
||||
print "graph_vtitle $config{$Type}->{vtitle}\n";
|
||||
print "graph_args $config{$Type}->{graph_args}\n";
|
||||
print "graph_scale yes\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_info $config{$Type}->{description} <br />Generated by MegaRaid_, Version $DisplayVer<br />\n";
|
||||
|
||||
foreach my $Line (@Output) {
|
||||
$Line=~ s/\r//g;
|
||||
$Line=~ s/\n//g;
|
||||
|
||||
#Find the device ID
|
||||
if ( $Line=~ m/Slot Number: /i ) {
|
||||
$DevID=$Line;
|
||||
$DevID=~ s/Slot Number: //;
|
||||
print "A".$Adapter."_D".$DevID."_$Type.label A$Adapter:D$DevID $config{$Type}->{label}\n";
|
||||
print "A".$Adapter."_D".$DevID."_$Type.info Adapter: $Adapter / Drive: $DevID - $config{$Type}->{info_tag}\n";
|
||||
if ($config{$Type}->{warning} ne '' ) {
|
||||
print "A".$Adapter."_D".$DevID."_$Type.warning $config{$Type}->{warning}\n";
|
||||
}
|
||||
if ($config{$Type}->{critical} ne '') {
|
||||
print "A".$Adapter."_D".$DevID."_$Type.critical $config{$Type}->{critical}\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
#Actually dump the data
|
||||
foreach my $Line (@Output) {
|
||||
$Line=~ s/\r//g;
|
||||
$Line=~ s/\n//g;
|
||||
|
||||
#Find the device ID
|
||||
if ( $Line=~ m/Slot Number: /i ) { $DevID=$Line; $DevID=~ s/Slot Number: //; chomp $DevID; }
|
||||
|
||||
#Find the data and print it out
|
||||
if ( $Line=~ m/$config{$Type}->{lookfor}/i ) {
|
||||
$DevData=$Line;
|
||||
$DevData=~s/$config{$Type}->{lookfor}//;
|
||||
$DevData=~s/C.*//;
|
||||
chomp $DevData;
|
||||
print "A".$Adapter."_D".$DevID."_$Type.value $DevData\n";
|
||||
}
|
||||
}
|
||||
|
||||
#Remove log file created by running MegaCli
|
||||
unlink "MegaSAS.log";
|
||||
|
||||
exit 0;
|
73
plugins/disk/raid
Executable file
73
plugins/disk/raid
Executable file
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/perl -w
|
||||
#
|
||||
# (c) 2007 Nathan Rutman nathan@clusterfs.com
|
||||
#
|
||||
# Plugin to monitor RAID status
|
||||
#
|
||||
# Results are % of healthy drives in a raid device
|
||||
# and % rebuilt of devices that are resyncing.
|
||||
#
|
||||
#%# family=contrib
|
||||
#%# capabilities=autoconf
|
||||
|
||||
if ($ARGV[0] and $ARGV[0] eq "autoconf") {
|
||||
if (-r "/proc/mdstat" and `grep md /proc/mdstat`) {
|
||||
print "yes\n";
|
||||
exit 0;
|
||||
} else {
|
||||
print "no RAID devices\n";
|
||||
exit 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
||||
print "graph_title RAID status\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n";
|
||||
print "graph_args --base 1000 -l 0\n";
|
||||
print "graph_vlabel % healthy/rebuilt\n";
|
||||
print "graph_scale no\n";
|
||||
}
|
||||
|
||||
{
|
||||
local( $/, *MDSTAT ) ;
|
||||
open (MDSTAT, "/proc/mdstat") or exit 1;
|
||||
#open (MDSTAT, "/etc/munin/plugins/sample.failed") or exit 1;
|
||||
my $text = <MDSTAT>;
|
||||
close MDSTAT;
|
||||
|
||||
# Should look like "active raid1 sda1[0] sdc1[2] sdb1[1]"
|
||||
# Interestingly, swap is presented as "active (auto-read-only)"
|
||||
while ($text =~ /(md\d+)\s+:\s+active\s+(\(auto-read-only\)\s+|)(\w+)\s+(.*)\n.*\[(\d+)\/(\d+)]\s+\[(\w+)]/ ) {
|
||||
my($dev,$dummy,$type,$members,$nmem,$nact,$status) = ($1,$2,$3,$4,$5,$6,$7);
|
||||
# print "$text\nitem: $dev $type ($members) status=$status \n";
|
||||
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
||||
print "$dev.label $dev\n";
|
||||
print "$dev.info $type $members\n";
|
||||
# 100: means less than 100
|
||||
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
|
||||
print "$dev.critical 98:\n";
|
||||
print $dev, "_rebuild.label $dev rebuilt\n";
|
||||
print $dev, "_rebuild.info $type\n";
|
||||
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100.
|
||||
print $dev, "_rebuild.critical 98:\n";
|
||||
} else {
|
||||
my $pct = 100 * $nact / $nmem;
|
||||
my $rpct = 100;
|
||||
if ( $pct < 100 ) {
|
||||
my @output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`;
|
||||
if( $output[0] =~ /([0-9]+)% complete/ ) {
|
||||
$rpct = $1;
|
||||
} else {
|
||||
$rpct = 0;
|
||||
}
|
||||
}
|
||||
print "$dev.value $pct\n";
|
||||
print $dev, "_rebuild.value $rpct\n";
|
||||
}
|
||||
$text = $';
|
||||
}
|
||||
}
|
||||
|
||||
exit 0;
|
||||
|
59
plugins/disk/raid-mismatch-count
Executable file
59
plugins/disk/raid-mismatch-count
Executable file
|
@ -0,0 +1,59 @@
|
|||
#!/bin/sh
|
||||
# Detect and display Linux sw-raid mismatch count
|
||||
# Copyright (C) 2011 Rory Jaffe <rsjaffe@gmail.com>
|
||||
# derived from md_sync_speed by Kristian Lyngstøl
|
||||
# Copyright (C) 2010 Kristian Lyngstøl <kristian@bohemians.org>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#####
|
||||
#
|
||||
# Magic Markers:
|
||||
# #%# family=auto
|
||||
# #%# capabilities=autoconf
|
||||
|
||||
|
||||
targets=`ls /sys/devices/virtual/block/*/md/mismatch_cnt | cut -d/ -f6`
|
||||
returnval=$?
|
||||
|
||||
if [ "x$1" = "xautoconf" ]; then
|
||||
if [ -z "$targets" ]; then
|
||||
echo "no (no md devices found under /sys/devices/virtual/block/*/md/mismatch_cnt)"
|
||||
exit 1;
|
||||
elif [ "x$returnval" != "x0" ]; then
|
||||
echo "no (discovery of md devices failed strangely)"
|
||||
exit 1;
|
||||
else
|
||||
echo "yes"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "x$1" = "xconfig" ]; then
|
||||
cat << __EOF__
|
||||
graph_title Software-raid mismatch count
|
||||
graph_args -l 0
|
||||
graph_info Display mismatch count of software raid devices
|
||||
graph_category disk
|
||||
graph_vlabel Count
|
||||
__EOF__
|
||||
for target in $targets; do
|
||||
echo "$target.label $target"
|
||||
done
|
||||
exit
|
||||
fi
|
||||
|
||||
for target in $targets; do
|
||||
echo $target.value $(cat /sys/devices/virtual/block/$target/md/sync_completed)
|
||||
done
|
246
plugins/disk/scsi_queue
Executable file
246
plugins/disk/scsi_queue
Executable file
|
@ -0,0 +1,246 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Munin plugin which reports queue busy-values per online SCSI
|
||||
device on Linux, as seen in /proc/scsi/sg/devices
|
||||
|
||||
If the busy-values often reach the queue depth of the device,
|
||||
one might consider increasing the queue depth. Hence, this
|
||||
plugin.
|
||||
|
||||
Wildcard use:
|
||||
If your system has many SCSI-like devices, filtering may be needed
|
||||
to make the resulting graphs readable.
|
||||
If you symlink the plugin, so that it's executed as
|
||||
scsi_queue_X_through_Y
|
||||
then the plugin will only look at devices
|
||||
/dev/sdX .. /dev/sdY
|
||||
X and Y may only be one-character values.
|
||||
X and Y are translated into a regular expression like:
|
||||
sd[X-Y]
|
||||
"""
|
||||
|
||||
# Author: Troels Arvin <tra@sst.dk>
|
||||
# See http://troels.arvin.dk/code/munin/ for latest version.
|
||||
|
||||
# Only tested with Red Hat Enterprise Linux 5 / CentOS 5, currently.
|
||||
|
||||
# Released according to the "New BSD License" AKA the 3-clause
|
||||
# BSD License:
|
||||
# ====================================================================
|
||||
# Copyright (c) 2010, Danish National Board of Health.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the the Danish National Board of Health nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY the Danish National Board of Health ''AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL the Danish National Board of Health BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# ====================================================================
|
||||
|
||||
# $Id: scsi_queue 13630 2010-08-31 15:29:14Z tra $
|
||||
|
||||
# Note to self:
|
||||
# The fields in /proc/scsi/sg/devices are:
|
||||
# host chan id lun type opens qdepth busy online
|
||||
|
||||
# TODO:
|
||||
# - Make it possible to group by multipath group. Might be
|
||||
# hard, though, because determining path groups seems
|
||||
# to require root privileges.
|
||||
# - Support autoconf
|
||||
# - How to support filtering on installations which have
|
||||
# many SCSI devices, beyond /dev/sdz?
|
||||
|
||||
import os, sys, re
|
||||
|
||||
procfile = '/proc/scsi/sg/devices'
|
||||
sysfs_base = '/sys/bus/scsi/devices'
|
||||
|
||||
my_canonical_name = 'scsi_queue' # If called as - e.g. - scsi_queue_foo, then
|
||||
# foo will be interpreted as a device filter.
|
||||
# For this, we need a base name.
|
||||
|
||||
def bailout(msg):
|
||||
sys.stderr.write(msg+"\n")
|
||||
sys.exit(1)
|
||||
|
||||
def print_config(devices,filter_from,filter_through):
|
||||
|
||||
title_qualification = ''
|
||||
if filter_from and filter_through:
|
||||
title_qualification = ' for devices sd%s through sd%s' % (filter_from,filter_through)
|
||||
|
||||
print 'graph_title SCSI queue busy values' + title_qualification
|
||||
print 'graph_vlabel busy count'
|
||||
print 'graph_args --base 1000 -l 0'
|
||||
print 'graph_category disk'
|
||||
print 'graph_info This graph shows the queue busy values, as seen in /prod/scsi/sg/devices'
|
||||
|
||||
keys = devices.keys()
|
||||
keys.sort()
|
||||
for key in keys:
|
||||
qdepth = devices[key]['qdepth']
|
||||
print '%s.min 0' % key
|
||||
print '%s.type GAUGE' % key
|
||||
print '%s.label %s (%s %s); qdepth=%s' % (
|
||||
key,
|
||||
key,
|
||||
devices[key]['vendor'],
|
||||
devices[key]['model'],
|
||||
qdepth
|
||||
)
|
||||
print '%s.max %s' % (key,qdepth)
|
||||
|
||||
# Return a list of lists representing interesting parts from procfile
|
||||
def parse_procfile():
|
||||
retval = []
|
||||
try:
|
||||
fh = open(procfile)
|
||||
for line in fh:
|
||||
retval.append(line.split())
|
||||
|
||||
except IOError, e:
|
||||
bailout('IO error: '+str(e))
|
||||
return retval
|
||||
|
||||
# Try to read a file's content. If any I/O problem: return empty string
|
||||
def readfile(path):
|
||||
try:
|
||||
f = open(path)
|
||||
retval = f.read().rstrip()
|
||||
f.close()
|
||||
except IOError, e:
|
||||
return ''
|
||||
return retval
|
||||
|
||||
# Return dict of dicts, indexed by device name
|
||||
def map_procentries_to_devices(list_of_dicts,devfilter_regex):
|
||||
device_dict={}
|
||||
|
||||
if devfilter_regex:
|
||||
regex_compiled = re.compile(devfilter_regex)
|
||||
|
||||
for elem in list_of_dicts:
|
||||
# In /sys/bus/scsi/devices we see a number of directory
|
||||
# entries, such as:
|
||||
# 0:0:0:0
|
||||
# 2:0:0:0
|
||||
# 3:0:0:0
|
||||
#
|
||||
# The colon-separated values map to the first four parts
|
||||
# of /proc/scsi/sg/devices
|
||||
# And the directory entries are symlinks which point to directories
|
||||
# in /sys/devices. By following a symlink, we may end up in
|
||||
# a directory which contains directory entries like:
|
||||
# - block:sdb
|
||||
# ...
|
||||
# - model
|
||||
# ...
|
||||
# - vendor
|
||||
sys_pathname = sysfs_base + '/' + ':'.join(elem[:4]) # isolate stuff like 2:0:0:0
|
||||
|
||||
# Should actually not happen, but nontheless:
|
||||
if not os.path.islink(sys_pathname):
|
||||
continue
|
||||
|
||||
# Search for dirent called block:SOMETHING
|
||||
# Put SOMETHING into blockdev_name
|
||||
# Couldn't make glob.glob() work: The length of the result
|
||||
# of glob() returned TypeError: len() of unsized object on
|
||||
# RHEL 5's python...
|
||||
dirents = os.listdir(sys_pathname)
|
||||
num_blocklines=0
|
||||
for dirent in dirents:
|
||||
if dirent.startswith('block:'):
|
||||
block_line = dirent
|
||||
num_blocklines += 1
|
||||
if num_blocklines == 0:
|
||||
continue
|
||||
if num_blocklines > 1:
|
||||
bailout("Got more than one result when globbing for '%s'" % glob_for)
|
||||
blockdev_name = block_line.split(':')[1]
|
||||
|
||||
# If device filtering is active, filter now
|
||||
if devfilter_regex:
|
||||
if not regex_compiled.match(blockdev_name):
|
||||
continue
|
||||
|
||||
# Merge info from the /proc and /sys sources
|
||||
device_dict[blockdev_name] = {
|
||||
'model' : readfile(sys_pathname+'/model'),
|
||||
'vendor': readfile(sys_pathname+'/vendor'),
|
||||
'qdepth': elem[6],
|
||||
'busy' : elem[7]
|
||||
}
|
||||
return device_dict
|
||||
|
||||
def print_values(devices):
|
||||
devnames = devices.keys()
|
||||
devnames.sort()
|
||||
retval = ''
|
||||
for devname in devnames:
|
||||
print "%s.value %s" % (
|
||||
devname,
|
||||
devices[devname]['busy']
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
# Initial sanity check
|
||||
n_args=len(sys.argv)
|
||||
if n_args > 2:
|
||||
# At most one arg expected
|
||||
print '%d arguments given - expecting only one' % n_args
|
||||
sys.exit(1)
|
||||
|
||||
# See if we were called with a Munin wildcard-style 'arg0-argument'
|
||||
# E.g., if called as scsi_queue_a_through_c, then consider only
|
||||
# devices sda, sdb, sdc.
|
||||
devfilter_regex = None
|
||||
called_as = os.path.basename(sys.argv[0])
|
||||
match = re.match(my_canonical_name+'_([^_])_through_([^_])', called_as)
|
||||
filter_from = None
|
||||
filter_through = None
|
||||
if match:
|
||||
filter_from = match.group(1)
|
||||
filter_through = match.group(2)
|
||||
devfilter_regex = 'sd['+filter_from+'-'+filter_through+']'
|
||||
|
||||
# Perform main piece of work
|
||||
devices = map_procentries_to_devices(
|
||||
parse_procfile(),
|
||||
devfilter_regex
|
||||
)
|
||||
|
||||
# See how we were called
|
||||
if n_args == 2:
|
||||
# An argument was given, so let's not simply print
|
||||
# values.
|
||||
arg = sys.argv[1]
|
||||
if arg == 'config':
|
||||
print_config(devices,filter_from,filter_through)
|
||||
sys.exit(0)
|
||||
else:
|
||||
print "Unknown argument '%s'" % arg
|
||||
sys.exit(1)
|
||||
|
||||
# No arguments given; print values
|
||||
print_values(devices)
|
78
plugins/disk/smart
Executable file
78
plugins/disk/smart
Executable file
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/perl
|
||||
#
|
||||
# Plugin to monitor all S.M.A.R.T. capable disks
|
||||
# author: paulv@dds.nl / paulv@bikkel.org
|
||||
# licence : public domain
|
||||
#
|
||||
# Usage: copy or link into /etc/munin/plugins/ as smart_[device] ( smart_sg0 for example)
|
||||
# Run as root
|
||||
#
|
||||
# Parameters:
|
||||
#
|
||||
# config (required)
|
||||
# autoconf (optional - used by munin-config)
|
||||
#
|
||||
# Magic markers (optional - used by munin-config and some installation
|
||||
# scripts):
|
||||
#
|
||||
#%# family=manual
|
||||
#%# capabilities=autoconf
|
||||
#
|
||||
|
||||
use strict;
|
||||
|
||||
my $device = "/dev/$1" if ( $0 =~ /[\w_-]+_(\w+\d+)$/ );
|
||||
my $smartctl = 'smartctl';
|
||||
my $smartctl_param = ' --attributes ';
|
||||
my %attr;
|
||||
|
||||
if ( $ARGV[0] and $ARGV[0] eq "autoconf" ) {
|
||||
|
||||
print "yes\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
open(SMART,"$smartctl $smartctl_param $device |") || die $!;
|
||||
|
||||
while(<SMART>) {
|
||||
chop;
|
||||
if ( m/\s*(\d+)\s+([\w_-]+)\s+(\d+x.+)\s+(\d+)\s+(\d+)\s+(\d+)\s+([\w_-]+)\s+(\w+)\s+([\w_-]+)\s+(\d+)/ ) {
|
||||
my $key = $1 . '_' . $2;
|
||||
my $rawvalue = $10;
|
||||
|
||||
$key = "170_Reserved_Block_Count" if $key eq "170_Unknown_Attribute";
|
||||
$key = "171_Program_Fail_Count" if $key eq "171_Unknown_Attribute";
|
||||
$key = "172_Erase_Fail_Count" if $key eq "172_Unknown_Attribute";
|
||||
$key = "173_Wear_Leveling_Count" if $key eq "173_Unknown_Attribute";
|
||||
$key = "174_Unexpected_Pwr_Loss" if $key eq "174_Unknown_Attribute";
|
||||
$key = "189_High_Fly_Writes" if $key eq "189_Unknown_Attribute";
|
||||
$key = "202_TA_Increase_Count" if $key eq "202_Unknown_Attribute";
|
||||
$key = "206_Flying_Height" if $key eq "206_Unknown_Attribute";
|
||||
|
||||
$attr{$key} = $rawvalue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( $ARGV[0] and $ARGV[0] eq "config" )
|
||||
{
|
||||
|
||||
print "graph_title SMART values for $device\n";
|
||||
print "graph_args --base 1000 -l 0\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_vlabel value\n";
|
||||
print "graph_scale no\n";
|
||||
print "graph_total Total\n";
|
||||
foreach my $i (keys %attr)
|
||||
{
|
||||
print "$i.label smartattribute $i\n";
|
||||
print "$i.draw LINE2\n";
|
||||
print "$i.min 0\n";
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
foreach my $k (keys %attr) {
|
||||
print $k . ".value " . $attr{$k} . "\n";
|
||||
}
|
||||
# end
|
52
plugins/disk/smart-by-id_
Executable file
52
plugins/disk/smart-by-id_
Executable file
|
@ -0,0 +1,52 @@
|
|||
#!/bin/bash
|
||||
|
||||
DISK=${0/*smart-by-id_/}
|
||||
SMARTCTL="`which smartctl | head -1` $SMARTOPTS"
|
||||
|
||||
echo "# $DISK"
|
||||
|
||||
export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
|
||||
case $1 in
|
||||
config)
|
||||
echo 'graph_title S.M.A.R.T values for drive '`readlink -f /dev/disk/by-id/$DISK`
|
||||
echo 'graph_vlabel Attribute S.M.A.R.T value'
|
||||
echo 'graph_args --base 1000 --lower-limit 0'
|
||||
echo 'graph_category disk'
|
||||
echo 'graph_info This graph shows the value of all S.M.A.R.T attributes of drive '`$SMARTCTL -i /dev/disk/by-id/$DISK | grep -Ei 'model|serial|firmware' | sed -re 's/.+?: +//gm' | tr '\n' ' '`
|
||||
echo 'smartctl_exit_status.label smartctl exit value'
|
||||
echo 'smartctl_exit_status.draw LINE2'
|
||||
echo 'smartctl_exit_class.label smartctl exit status'
|
||||
echo 'smartctl_exit_class.draw AREA'
|
||||
echo 'smartctl_exit_class.warning :0'
|
||||
echo 'smartctl_exit_class.critical :1'
|
||||
|
||||
$SMARTCTL -A /dev/disk/by-id/$DISK | grep 0x | while read
|
||||
do
|
||||
OP=($REPLY)
|
||||
ON=`echo -n ${OP[1]} | tr -c '[A-z0-9]' '_'`
|
||||
OL=`echo -n ${OP[1]} | tr '_' ' '`
|
||||
echo ${ON}.label ${OL}
|
||||
echo ${ON}.draw LINE2
|
||||
echo ${ON}.critical ${OP[5]}:
|
||||
done
|
||||
;;
|
||||
suggest)
|
||||
ls -1 /dev/disk/by-id/scsi-* | grep -v part | grep -o scsi.*
|
||||
;;
|
||||
"")
|
||||
$SMARTCTL -a /dev/disk/by-id/$DISK &> /dev/null
|
||||
SES=$?
|
||||
echo "smartctl_exit_status.value $SES"
|
||||
if [ $SES -gt 0 ]
|
||||
then
|
||||
if [ $((SES & 7)) -gt 0 ] ; then exit 1 ; fi
|
||||
if [ $((SES & 24)) -gt 0 ] ; then SES=2 ; fi
|
||||
if [ $((SES & 224)) -gt 0 ] ; then SES=1 ; fi
|
||||
fi
|
||||
echo "smartctl_exit_class.value $SES"
|
||||
$SMARTCTL -A /dev/disk/by-id/$DISK | awk '/0x/ { gsub(/[^a-zA-Z0-9]/,"_",$2); print $2.".value",$4; }'
|
||||
;;
|
||||
esac
|
||||
|
||||
#exit 0
|
585
plugins/disk/smart_
Executable file
585
plugins/disk/smart_
Executable file
|
@ -0,0 +1,585 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- encoding: iso-8859-1 -*-
|
||||
#
|
||||
# Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl,
|
||||
# which is part of smartmontools package:
|
||||
# http://smartmontools.sourceforge.net/
|
||||
#
|
||||
# To monitor a S.M.A.R.T device, link smart_<device> to this file.
|
||||
# E.g.
|
||||
# ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
|
||||
# ...will monitor /dev/hda.
|
||||
#
|
||||
# Needs following minimal configuration in plugin-conf.d/munin-node:
|
||||
# [smart_*]
|
||||
# user root
|
||||
# group disk
|
||||
#
|
||||
# Parameters
|
||||
# smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
|
||||
# smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
|
||||
# ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
|
||||
#
|
||||
# Parameters can be specified on a per-drive basis, eg:
|
||||
# [smart_hda]
|
||||
# user root
|
||||
# group disk
|
||||
# env.smartargs -H -c -l error -l selftest -l selective -d ata
|
||||
# env.smartpath /usr/local/sbin/smartctl
|
||||
#
|
||||
# [smart_twa0-1]
|
||||
# user root
|
||||
# group disk
|
||||
# env.smartargs -H -l error -d 3ware,1
|
||||
# env.ignorestandby True
|
||||
#
|
||||
# [smart_twa0-2]
|
||||
# user root
|
||||
# group disk
|
||||
# env.smartargs -H -l error -d 3ware,2
|
||||
#
|
||||
# Author: Nicolas Stransky <Nico@neo-lan.net>
|
||||
#
|
||||
# v1.0 22/08/2004 - First draft
|
||||
# v1.2 28/08/2004 - Clean up the code, add a verbose option
|
||||
# v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code
|
||||
# v1.4 17/11/2004 - Deal with non zero exit codes of smartctl
|
||||
# - config now prints the critical thresholds, as reported by smartctl
|
||||
# v1.5 18/11/2004 - Plot smartctl_exit_code bitmask
|
||||
# v1.6 21/11/2004 - Add autoconf and suggest capabilities
|
||||
# - smartctl path can be passed through "smartpath" environment variable
|
||||
# - Additional smartctl args can be passed through "smartargs" environment variable
|
||||
# v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS.
|
||||
# - Allow to override completely the smartctl arguments with "smartargs"
|
||||
# v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals.
|
||||
# v1.9 07/07/2005 - Allow to query several drives on the same 3ware card.
|
||||
# - Correct a bug when '-i' was not listed in smartargs
|
||||
# - Don't fail if no value was obtained for hard drive model
|
||||
# v1.10 19/08/2005 - smartctl_exit_code is now a numerical value
|
||||
# v2.0 08/05/2009 - Correct bug in the interpretation of smartctl_exit_code
|
||||
# - New option to suppress SMART warnings in munin
|
||||
# - Temporary lack of output for previously existing drive now reports U
|
||||
# - The plugin now contains its own documentation for use with munindoc
|
||||
# - Removed python<2.2 compatibility comments
|
||||
# - Better autodetection of drives
|
||||
# - Don't spin up devices in a low-power mode.
|
||||
#
|
||||
# Copyright (c) 2004-2009 Nicolas Stransky.
|
||||
#
|
||||
# Permission to use, copy, and modify this software with or without fee
|
||||
# is hereby granted, provided that this entire notice is included in
|
||||
# all source code copies of any software which is or includes a copy or
|
||||
# modification of this software.
|
||||
#
|
||||
# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
|
||||
# IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
|
||||
# REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
|
||||
# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
|
||||
# PURPOSE.
|
||||
#
|
||||
#
|
||||
# Magic markers
|
||||
#%# capabilities=autoconf suggest
|
||||
#%# family=auto
|
||||
|
||||
## You may edit the following 3 variables
|
||||
# Increase verbosity (True/False)
|
||||
verbose=False
|
||||
# Suppress SMART warnings (True/False)
|
||||
report_warnings=True
|
||||
# Modify to your needs:
|
||||
statefiledir='/var/lib/munin/plugin-state/'
|
||||
# You may not modify anything below this line
|
||||
|
||||
import os, sys, string, pickle
|
||||
from math import log
|
||||
plugin_version="2.0"
|
||||
|
||||
def verboselog(s):
|
||||
global plugin_name
|
||||
sys.stderr.write(plugin_name+': '+s+'\n')
|
||||
|
||||
if not verbose :
|
||||
verboselog = lambda s: None
|
||||
|
||||
def read_values(hard_drive):
|
||||
global smart_values, emptyoutput
|
||||
try :
|
||||
verboselog('Reading S.M.A.R.T values')
|
||||
os.putenv('LC_ALL','C')
|
||||
smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+(os.getenv('ignorestandby',False) and ' ' or ' -n standby ')+'-A -i /dev/'+hard_drive)
|
||||
read_values=0
|
||||
for l in smart_output :
|
||||
if l[:-1]=='' :
|
||||
read_values=0
|
||||
elif l[:13]=='Device Model:' or l[:7]=='Device:' :
|
||||
model_list=string.split(string.split(l,':')[1])
|
||||
try: model_list.remove('Version')
|
||||
except : None
|
||||
model=string.join(model_list)
|
||||
if read_values==1 :
|
||||
smart_attribute=string.split(l)
|
||||
smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[3],"threshold":smart_attribute[5]}
|
||||
elif l[:18]=="ID# ATTRIBUTE_NAME" :
|
||||
# Start reading the Attributes block
|
||||
read_values=1
|
||||
exit_status=smart_output.close()
|
||||
if exit_status!=None :
|
||||
# smartctl exit code is a bitmask, check man page.
|
||||
num_exit_status=int(exit_status/256) # Python convention
|
||||
if int(log(num_exit_status,2))<=2 : # bit code
|
||||
verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+')')
|
||||
else :
|
||||
verboselog('smartctl exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+'). '+hard_drive+' may be FAILING RIGHT NOW!')
|
||||
else :
|
||||
num_exit_status=0
|
||||
except :
|
||||
verboselog('Cannot access S.M.A.R.T values! Check user rights or propper smartmontools installation/arguments.')
|
||||
sys.exit(1)
|
||||
if smart_values=={} :
|
||||
verboselog('Can\'t find any S.M.A.R.T values in smartctl output!')
|
||||
emptyoutput=True
|
||||
#sys.exit(1)
|
||||
else : emptyoutput=False
|
||||
smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"}
|
||||
try : smart_values["model"]=model
|
||||
# For some reason we may have no value for "model"
|
||||
except : smart_values["model"]="unknown"
|
||||
return(exit_status)
|
||||
|
||||
def open_state_file(hard_drive,mode) :
|
||||
global statefiledir
|
||||
return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode)
|
||||
|
||||
def update_state_file(hard_drive) :
|
||||
try:
|
||||
verboselog('Saving statefile')
|
||||
pickle.dump(smart_values,open_state_file(hard_drive,"w"))
|
||||
except :
|
||||
verboselog('Error trying to save state file! Check access rights')
|
||||
|
||||
def print_plugin_values(hard_drive) :
|
||||
global emptyoutput, smart_values
|
||||
if not emptyoutput:
|
||||
verboselog('Printing S.M.A.R.T values')
|
||||
for key in smart_values.keys() :
|
||||
if key=="model" : continue
|
||||
print(key+".value "+smart_values[key]["value"])
|
||||
else:
|
||||
print_unknown_from_statefile(hard_drive,smart_values)
|
||||
|
||||
def print_config(hard_drive) :
|
||||
global report_warnings, smart_values, statefiledir
|
||||
if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
|
||||
try :
|
||||
verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
|
||||
smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
|
||||
except :
|
||||
verboselog('Error opening existing state file!')
|
||||
sys.exit(1)
|
||||
else :
|
||||
verboselog('No state file, reading S.M.A.R.T values for the first time')
|
||||
read_values(hard_drive[0])
|
||||
pickle.dump(smart_values,open_state_file(hard_drive,"w"))
|
||||
smart_values_state=smart_values
|
||||
|
||||
verboselog('Printing configuration')
|
||||
print('graph_title S.M.A.R.T values for drive '+string.join(hard_drive,","))
|
||||
print('graph_vlabel Attribute S.M.A.R.T value')
|
||||
print('graph_args --base 1000 --lower-limit 0')
|
||||
print('graph_category disk')
|
||||
print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.')
|
||||
attributes=smart_values_state.keys()
|
||||
attributes.sort()
|
||||
for key in attributes :
|
||||
if key in ['smartctl_exit_status','model'] : continue
|
||||
print(key+'.label '+key)
|
||||
print(key+'.draw LINE2')
|
||||
if report_warnings: print(key+'.critical '+smart_values_state[key]["threshold"]+':')
|
||||
print('smartctl_exit_status.label smartctl_exit_status')
|
||||
print('smartctl_exit_status.draw LINE2')
|
||||
if report_warnings: print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"])
|
||||
|
||||
def print_unknown_from_statefile(hard_drive,smart_values) :
|
||||
global statefiledir
|
||||
if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'):
|
||||
try :
|
||||
verboselog('Failed to get S.M.A.R.T values from drive. Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
|
||||
smart_values_state=pickle.load(open_state_file(hard_drive,"r"))
|
||||
except :
|
||||
verboselog('Error opening existing state file!')
|
||||
sys.exit(1)
|
||||
else :
|
||||
verboselog('No state file, reading S.M.A.R.T values for the first time')
|
||||
exit(1)
|
||||
|
||||
verboselog('Printing unknown values for all attributes in state file')
|
||||
attributes=smart_values_state.keys()
|
||||
attributes.sort()
|
||||
for key in attributes :
|
||||
if key=='model' : continue
|
||||
print(key+'.value U')
|
||||
|
||||
def get_hard_drive_name() :
|
||||
global plugin_name
|
||||
try :
|
||||
name=[plugin_name[string.rindex(plugin_name,'_')+1:]]
|
||||
if os.uname()[0]=="SunOS" :
|
||||
try :
|
||||
# if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path
|
||||
if name[0][0:4]=="rdsk":
|
||||
name[0]=os.path.join("rdsk",name[0][4:])
|
||||
elif name[0][0:3]=="rmt":
|
||||
name[0]=os.path.join("rmt",name[0][3:])
|
||||
except :
|
||||
verboselog('Failed to find SunOS hard_drive')
|
||||
# For 3ware cards, we have to set multiple plugins for the same hard drive name.
|
||||
# Let's see if we find a '-' in the drive name.
|
||||
if name[0].find('-')!=-1:
|
||||
# Put the drive name and it's number in a list
|
||||
name=[name[0][:string.rindex(name[0],'-')],name[0][string.rindex(name[0],'-')+1:]]
|
||||
# Chech that the drive exists in /dev
|
||||
if not os.path.exists('/dev/'+name[0]):
|
||||
verboselog('/dev/'+name[0]+' not found!')
|
||||
sys.exit(1)
|
||||
return(name)
|
||||
except :
|
||||
verboselog('No S.M.A.R.T device name found in plugin\'s symlink!')
|
||||
sys.exit(1)
|
||||
|
||||
def find_smart_drives() :
|
||||
global emptyoutput
|
||||
# Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card.
|
||||
drives=[]
|
||||
if os.uname()[0]=="Linux" :
|
||||
if os.path.exists('/sys/block/'):
|
||||
# Running 2.6
|
||||
try :
|
||||
for drive in os.listdir('/sys/block/') :
|
||||
if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices.
|
||||
try :
|
||||
verboselog('Trying '+drive+'...')
|
||||
exit_status=read_values(drive)
|
||||
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||||
drives.append(drive)
|
||||
except :
|
||||
continue
|
||||
except :
|
||||
verboselog('Failed to list devices in /sys/block')
|
||||
else :
|
||||
verboselog('Not running linux2.6, failing back to /proc/partitions')
|
||||
try :
|
||||
partitions=open('/proc/partitions','r')
|
||||
L=partitions.readlines()
|
||||
for l in L :
|
||||
words=string.split(l)
|
||||
if len(words)==0 or words[0][0] not in string.digits : continue
|
||||
if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices
|
||||
if words[-1][-1] not in string.digits :
|
||||
try :
|
||||
verboselog('Trying '+words[-1]+'...')
|
||||
exit_status=read_values(words[-1])
|
||||
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||||
drives.append(words[-1])
|
||||
except :
|
||||
continue
|
||||
verboselog('Found drives in /proc/partitions ! '+str(drives))
|
||||
except :
|
||||
verboselog('Failed to list devices in /proc/partitions')
|
||||
elif os.uname()[0]=="OpenBSD" :
|
||||
try :
|
||||
sysctl_kerndisks=os.popen('sysctl hw.disknames')
|
||||
kerndisks=string.strip(sysctl_kerndisks.readline())
|
||||
for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') :
|
||||
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
|
||||
try :
|
||||
verboselog('Trying '+drive+'c...')
|
||||
exit_status=read_values(drive+'c')
|
||||
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||||
drives.append(drive+'c')
|
||||
except :
|
||||
continue
|
||||
except :
|
||||
verboselog('Failed to list OpenBSD disks')
|
||||
elif os.uname()[0]=="FreeBSD" :
|
||||
try :
|
||||
sysctl_kerndisks=os.popen('sysctl kern.disks')
|
||||
kerndisks=string.strip(sysctl_kerndisks.readline())
|
||||
for drive in string.split(kerndisks)[1:] :
|
||||
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
|
||||
try :
|
||||
verboselog('Trying '+drive+'...')
|
||||
exit_status=read_values(drive)
|
||||
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||||
drives.append(drive)
|
||||
except :
|
||||
continue
|
||||
except :
|
||||
verboselog('Failed to list FreeBSD disks')
|
||||
elif os.uname()[0]=="NetBSD" :
|
||||
try :
|
||||
sysctl_kerndisks=os.popen('sysctl hw.disknames')
|
||||
kerndisks=string.strip(sysctl_kerndisks.readline())
|
||||
for drive in string.split(kerndisks)[2:] :
|
||||
if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy
|
||||
try :
|
||||
verboselog('Trying '+drive+'c...')
|
||||
exit_status=read_values(drive+'c')
|
||||
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||||
drives.append(drive+'c')
|
||||
except :
|
||||
continue
|
||||
except :
|
||||
verboselog('Failed to list NetBSD disks')
|
||||
elif os.uname()[0]=="SunOS" :
|
||||
try :
|
||||
from glob import glob
|
||||
for drivepath in glob('/dev/rdsk/*s2') :
|
||||
try :
|
||||
drive=os.path.basename(drivepath)
|
||||
verboselog('Trying rdsk'+drive+'...')
|
||||
exit_status=read_values('rdsk'+drive)
|
||||
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||||
drives.append('rdsk'+drive)
|
||||
except :
|
||||
continue
|
||||
for drivepath in glob('/dev/rmt/*') :
|
||||
try :
|
||||
drive=os.path.basename(drivepath)
|
||||
verboselog('Trying rmt'+drive+'...')
|
||||
exit_status=read_values('rmt'+drive)
|
||||
if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput:
|
||||
drives.append('rmt'+drive)
|
||||
except :
|
||||
continue
|
||||
except :
|
||||
verboselog('Failed to list SunOS disks')
|
||||
return(drives)
|
||||
|
||||
### Main part ###
|
||||
|
||||
smart_values={}
|
||||
emptyoutput=False
|
||||
plugin_name=list(os.path.split(sys.argv[0]))[1]
|
||||
verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid()))
|
||||
|
||||
# Parse arguments
|
||||
if len(sys.argv)>1 :
|
||||
if sys.argv[1]=="config" :
|
||||
hard_drive=get_hard_drive_name()
|
||||
print_config(hard_drive)
|
||||
sys.exit(0)
|
||||
elif sys.argv[1]=="autoconf" :
|
||||
if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) :
|
||||
print('yes')
|
||||
sys.exit(0)
|
||||
else :
|
||||
print('no (smartmontools not found)')
|
||||
sys.exit(1)
|
||||
elif sys.argv[1]=="suggest" :
|
||||
for drive in find_smart_drives() :
|
||||
print(drive)
|
||||
sys.exit(0)
|
||||
elif sys.argv[1]=="version" :
|
||||
print('smart_ Munin plugin, version '+plugin_version)
|
||||
sys.exit(0)
|
||||
elif sys.argv[1]!="" :
|
||||
verboselog('unknown argument "'+sys.argv[1]+'"')
|
||||
sys.exit(1)
|
||||
|
||||
# No argument given, doing the real job:
|
||||
hard_drive=get_hard_drive_name()
|
||||
read_values(hard_drive[0])
|
||||
if not emptyoutput: update_state_file(hard_drive)
|
||||
print_plugin_values(hard_drive)
|
||||
exit(0)
|
||||
|
||||
|
||||
### The following is the smart_ plugin documentation, intended to be used with munindoc
|
||||
"""
|
||||
=head1 NAME
|
||||
|
||||
smart_ - Munin wildcard-plugin to monitor S.M.A.R.T. attribute values through smartctl
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
Node with B<Python> interpreter and B<smartmontools> (http://smartmontools.sourceforge.net/)
|
||||
installed and in function.
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
=head2 Create link in service directory
|
||||
|
||||
To monitor a S.M.A.R.T device, create a link in the service directory
|
||||
of the munin-node named smart_<device>, which is pointing to this file.
|
||||
|
||||
E.g.
|
||||
|
||||
ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda
|
||||
|
||||
...will monitor /dev/hda.
|
||||
|
||||
=head2 Grant privileges in munin-node
|
||||
|
||||
The plugin must be run under high privileged user B<root>, to get access to the raw device.
|
||||
|
||||
So following minimal configuration in plugin-conf.d/munin-node is needed.
|
||||
|
||||
=over 2
|
||||
|
||||
[smart_*]
|
||||
user root
|
||||
group disk
|
||||
|
||||
=back
|
||||
|
||||
=head2 Set Parameter if needed
|
||||
|
||||
smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl)
|
||||
smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs
|
||||
ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False
|
||||
|
||||
Parameters can be specified on a per-drive basis, eg:
|
||||
|
||||
=over 2
|
||||
|
||||
[smart_hda]
|
||||
user root
|
||||
env.smartargs -H -c -l error -l selftest -l selective -d ata
|
||||
env.smartpath /usr/local/sbin/smartctl
|
||||
|
||||
=back
|
||||
|
||||
In particular, for SATA drives, with older versions of smartctl:
|
||||
|
||||
=over 2
|
||||
|
||||
[smart_sda]
|
||||
user root
|
||||
env.smartargs -d ata -a
|
||||
|
||||
[smart_twa0-1]
|
||||
user root
|
||||
env.smartargs -H -l error -d 3ware,1
|
||||
env.ignorestandby True
|
||||
|
||||
[smart_twa0-2]
|
||||
user root
|
||||
env.smartargs -H -l error -d 3ware,2
|
||||
|
||||
=back
|
||||
|
||||
=head1 INTERPRETATION
|
||||
|
||||
If a device supports the B<Self-Monitoring, Analysis
|
||||
and Reporting Technology (S.M.A.R.T.)> it offers readable
|
||||
access to the attribute table. There you find the B<raw value>,
|
||||
a B<normalised value> and a B<threshold> (set by the vendor)
|
||||
for each attribute, that is supported by that device.
|
||||
|
||||
The meaning and handling of the raw value is a secret of the
|
||||
vendors embedded S.M.A.R.T.-Software on the disk. The only
|
||||
relevant info from our external view is the B<normalised value>
|
||||
in comparison with the B<threshold>. If the attributes value is
|
||||
equal or below the threshold, it signals its failure and
|
||||
the B<health status> of the device will switch from B<passed> to B<failed>.
|
||||
|
||||
This plugin fetches the B<normalised values of all SMART-Attributes>
|
||||
and draw a curve for each of them.
|
||||
It takes the vendors threshold as critical limit for the munin datafield.
|
||||
So you will see an alarm, if the value reaches the vendors threshold.
|
||||
|
||||
Looking at the graph: It is a bad sign, if the curve starts
|
||||
to curl or to meander. The more horizontal it runs,
|
||||
the better. Of course it is normal, that the temperatures
|
||||
curve swings a bit. But the others should stay steady on
|
||||
their level if everything is ok.
|
||||
|
||||
S.M.A.R.T. distinguishes between B<Pre-fail> and B<Old-age>
|
||||
Attributes. An old disk will have more curling curves
|
||||
because of degradation, especially for the B<Old-age> Attributes.
|
||||
You should then backup more often, run more selftests[1] and prepare
|
||||
the disks replacement.
|
||||
|
||||
B<Act directly>, if a <Pre-Fail> Attribute goes below threshold.
|
||||
Immediately back-up your data and replace your hard disk drive.
|
||||
A failure may be imminent..
|
||||
|
||||
[1] Consult the smartmontools manpages to learn about
|
||||
offline tests and automated selftests with smartd.
|
||||
Only with both activated, the values of the SMART-Attributes
|
||||
reflect the all over state of the device.
|
||||
|
||||
Tutorials and articles about S.M.A.R.T. and smartmontools:
|
||||
http://smartmontools.sourceforge.net/doc.html#tutorials
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=auto
|
||||
#%# capabilities=autoconf suggest
|
||||
|
||||
=head1 CALL OPTIONS
|
||||
|
||||
B<none>
|
||||
|
||||
=over 2
|
||||
|
||||
Fetches values if called without arguments:
|
||||
|
||||
E.g.: munin-run smart_hda
|
||||
|
||||
=back
|
||||
|
||||
B<config>
|
||||
|
||||
=over 2
|
||||
|
||||
Prints plugins configuration.
|
||||
|
||||
E.g.: munin-run smart_hda config
|
||||
|
||||
=back
|
||||
|
||||
B<autoconf>
|
||||
|
||||
=over 2
|
||||
|
||||
Tries to find smartctl and outputs value 'yes' for success, 'no' if not.
|
||||
|
||||
It's used by B<munin-node-configure> to see wether autoconfiguration is possible.
|
||||
|
||||
=back
|
||||
|
||||
B<suggest>
|
||||
|
||||
=over 2
|
||||
|
||||
Outputs the list of device names, that it found plugged to the system.
|
||||
|
||||
B<munin-node-configure> use this to build the service links for this wildcard-plugin.
|
||||
|
||||
=back
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
Version 2.0
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
None known
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
(C) 2004-2009 Nicolas Stransky <Nico@stransky.cx>
|
||||
|
||||
(C) 2008 Gabriele Pohl <contact@dipohl.de>
|
||||
Reformated existent documentation to POD-Style, added section Interpretation to the documentation.
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt)
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
"""
|
175
plugins/disk/snmp__netapp_diskusage_
Executable file
175
plugins/disk/snmp__netapp_diskusage_
Executable file
|
@ -0,0 +1,175 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
=head1 NAME
|
||||
|
||||
snmp__netapp_diskusage_ - Munin plugin to retrieve file systems usage on
|
||||
NetApp storage appliances.
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
File systems usage stats should be reported by any NetApp storage
|
||||
appliance with SNMP agent daemon activated. See na_snmp(8) for details.
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
Unfortunately, SNMPv3 is not fully supported on all NetApp equipments.
|
||||
For this reason, this plugin will use SNMPv2 by default, which is
|
||||
insecure because it doesn't encrypt the community string.
|
||||
|
||||
The following parameters will help you get this plugin working :
|
||||
|
||||
[snmp_*]
|
||||
env.community MyCommunity
|
||||
|
||||
If your community name is 'public', you should really worry about
|
||||
security and immediately reconfigure your appliance.
|
||||
|
||||
Please see 'perldoc Munin::Plugin::SNMP' for further configuration.
|
||||
|
||||
=head1 INTERPRETATION
|
||||
|
||||
The plugin reports file systems usage. This can help you monitoring file
|
||||
systems usage in a given period of time.
|
||||
|
||||
=head1 MIB INFORMATION
|
||||
|
||||
This plugin requires support for the NETWORK-APPLIANCE-MIB issued by
|
||||
Network Appliance. It reports the content of the DfEntry OID.
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=snmpauto
|
||||
#%# capabilities=snmpconf
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
v1.0 - 06/22/2009 14:05:03 CEST
|
||||
Initial revision
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
This plugin is copyright (c) 2009 by Guillaume Blairon.
|
||||
|
||||
NetApp is a registered trademark and Network Appliance is a trademark
|
||||
of Network Appliance, Inc. in the U.S. and other countries.
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
This plugin wasn't tested on many hardware. If you encounter bugs,
|
||||
please report them to Guillaume Blairon E<lt>L<g@yom.be>E<gt>.
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2 or (at your option) any later version.
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Munin::Plugin::SNMP;
|
||||
use vars qw($DEBUG);
|
||||
|
||||
$DEBUG = $ENV{'MUNIN_DEBUG'};
|
||||
|
||||
my @palette =
|
||||
#Better colours from munin 1.3.x
|
||||
#Greens Blues Oranges Dk yel Dk blu Purple Lime Reds Gray
|
||||
qw(00CC00 0066B3 FF8000 FFCC00 330099 990099 CCFF00 FF0000 808080
|
||||
008F00 00487D B35A00 B38F00 6B006B 8FB300 B30000 BEBEBE
|
||||
80FF80 80C9FF FFC080 FFE680 AA80FF EE00CC FF8080
|
||||
666600 FFBFFF 00FFCC CC6699 999900);
|
||||
|
||||
my %oids = (
|
||||
|
||||
# - dfHigh.* : 32 most significant bits counters
|
||||
# - dfLow.* : 32 least significant bits counters
|
||||
|
||||
dfHighTotalKBytes => '1.3.6.1.4.1.789.1.5.4.1.14.',
|
||||
dfLowTotalKBytes => '1.3.6.1.4.1.789.1.5.4.1.15.',
|
||||
dfHighUsedKBytes => '1.3.6.1.4.1.789.1.5.4.1.16.',
|
||||
dfLowUsedKBytes => '1.3.6.1.4.1.789.1.5.4.1.17.',
|
||||
dfHighAvailKBytes => '1.3.6.1.4.1.789.1.5.4.1.18.',
|
||||
dfLowAvailKBytes => '1.3.6.1.4.1.789.1.5.4.1.19.',
|
||||
|
||||
);
|
||||
|
||||
sub to_32bit_int {
|
||||
my ($l, $h) = @_;
|
||||
return "U" if ((!defined $l) || (!defined $h));
|
||||
my $bin = unpack( 'B32', pack('N', $l) . pack('N', $h) );
|
||||
return unpack( 'N', pack('B32', $bin) );
|
||||
}
|
||||
|
||||
if (defined $ARGV[0] and $ARGV[0] eq 'snmpconf') {
|
||||
print "number 1.3.6.1.4.1.789.1.5.6.0\n";
|
||||
print "index 1.3.6.1.4.1.789.1.5.4.1.1.\n";
|
||||
foreach (keys %oids) {
|
||||
print "require $oids{$_} [0-9]\n";
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my $session = Munin::Plugin::SNMP->session();
|
||||
my ($host, undef, undef, $tail) = Munin::Plugin::SNMP->config_session();
|
||||
my ($df_id, $name_oid);
|
||||
|
||||
if ($tail =~ /^netapp_diskusage_(\d+)$/) {
|
||||
$df_id = $1;
|
||||
$name_oid = '1.3.6.1.4.1.789.1.5.4.1.2.' . $df_id;
|
||||
} else {
|
||||
die "Couldn't understand what I'm supposed to monitor";
|
||||
}
|
||||
|
||||
if (defined $ARGV[0] and $ARGV[0] eq "config") {
|
||||
my $df_name = $session->get_single($name_oid);
|
||||
|
||||
print "host_name $host\n" unless $host eq 'localhost';
|
||||
print "graph_title $host disk usage on $df_name\n";
|
||||
print "graph_args --base 1024 --lower-limit 0\n";
|
||||
print "graph_vlabel bytes\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_info This graph shows the disk usage for $df_name on NetApp host $host\n";
|
||||
print "graph_order used avail total\n";
|
||||
print "used.info The total disk space in KBytes that is in use on the $df_name file system.\n";
|
||||
print "used.type GAUGE\n";
|
||||
print "used.draw AREA\n";
|
||||
print "used.label Used\n";
|
||||
print "used.cdef used,1024,*\n";
|
||||
print "used.min 0\n";
|
||||
print "used.colour $palette[1]\n";
|
||||
print "avail.info The total disk space in KBytes that is free for use on the $df_name file system.\n";
|
||||
print "avail.type GAUGE\n";
|
||||
print "avail.draw STACK\n";
|
||||
print "avail.label Available\n";
|
||||
print "avail.cdef avail,1024,*\n";
|
||||
print "avail.min 0\n";
|
||||
print "avail.colour $palette[3]\n";
|
||||
print "total.info The total capacity in KBytes for the $df_name file system.\n";
|
||||
print "total.type GAUGE\n";
|
||||
print "total.draw LINE2\n";
|
||||
print "total.label Total\n";
|
||||
print "total.cdef total,1024,*\n";
|
||||
print "total.min 0\n";
|
||||
print "total.colour $palette[7]\n";
|
||||
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my $used_l = $session->get_single($oids{dfLowUsedKBytes}.$df_id);
|
||||
my $used_h = $session->get_single($oids{dfHighUsedKBytes}.$df_id);
|
||||
my $avail_l = $session->get_single($oids{dfLowAvailKBytes}.$df_id);
|
||||
my $avail_h = $session->get_single($oids{dfHighAvailKBytes}.$df_id);
|
||||
my $total_l = $session->get_single($oids{dfLowTotalKBytes}.$df_id);
|
||||
my $total_h = $session->get_single($oids{dfHighTotalKBytes}.$df_id);
|
||||
|
||||
my $used = to_32bit_int($used_l, $used_h);
|
||||
my $avail = to_32bit_int($avail_l, $avail_h);
|
||||
my $total = to_32bit_int($total_l, $total_h);
|
||||
|
||||
print "used.value $used\n";
|
||||
print "avail.value $avail\n";
|
||||
print "total.value $total\n";
|
||||
|
||||
exit 0;
|
||||
|
||||
__END__
|
144
plugins/disk/snmp__netapp_inodeusage_
Executable file
144
plugins/disk/snmp__netapp_inodeusage_
Executable file
|
@ -0,0 +1,144 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
=head1 NAME
|
||||
|
||||
snmp__netapp_inodeusage_ - Munin plugin to retrieve inodes usage on
|
||||
NetApp storage appliances.
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
Inodes usage stats should be reported by any NetApp storage appliance
|
||||
with SNMP agent daemon activated. See na_snmp(8) for details.
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
Unfortunately, SNMPv3 is not fully supported on all NetApp equipments.
|
||||
For this reason, this plugin will use SNMPv2 by default, which is
|
||||
insecure because it doesn't encrypt the community string.
|
||||
|
||||
The following parameters will help you get this plugin working :
|
||||
|
||||
[snmp_*]
|
||||
env.community MyCommunity
|
||||
|
||||
If your community name is 'public', you should really worry about
|
||||
security and immediately reconfigure your appliance.
|
||||
|
||||
Please see 'perldoc Munin::Plugin::SNMP' for further configuration.
|
||||
|
||||
=head1 MIB INFORMATION
|
||||
|
||||
This plugin requires support for the NETWORK-APPLIANCE-MIB issued by
|
||||
Network Appliance. It reports the content of the DfEntry OID.
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=snmpauto
|
||||
#%# capabilities=snmpconf
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
v1.0 - 06/22/2009 14:05:03 CEST
|
||||
Initial revision
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
This plugin is copyright (c) 2009 by Guillaume Blairon.
|
||||
|
||||
NetApp is a registered trademark and Network Appliance is a trademark
|
||||
of Network Appliance, Inc. in the U.S. and other countries.
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
This plugin wasn't tested on many hardware. If you encounter bugs,
|
||||
please report them to Guillaume Blairon E<lt>L<g@yom.be>E<gt>.
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2 or (at your option) any later version.
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Munin::Plugin::SNMP;
|
||||
use vars qw($DEBUG);
|
||||
|
||||
$DEBUG = $ENV{'MUNIN_DEBUG'};
|
||||
|
||||
my @palette =
|
||||
#Better colours from munin 1.3.x
|
||||
#Greens Blues Oranges Dk yel Dk blu Purple Lime Reds Gray
|
||||
qw(00CC00 0066B3 FF8000 FFCC00 330099 990099 CCFF00 FF0000 808080
|
||||
008F00 00487D B35A00 B38F00 6B006B 8FB300 B30000 BEBEBE
|
||||
80FF80 80C9FF FFC080 FFE680 AA80FF EE00CC FF8080
|
||||
666600 FFBFFF 00FFCC CC6699 999900);
|
||||
|
||||
my %oids = (
|
||||
dfInodesUsed => '1.3.6.1.4.1.789.1.5.4.1.7.',
|
||||
dfInodesFree => '1.3.6.1.4.1.789.1.5.4.1.8.',
|
||||
);
|
||||
|
||||
if (defined $ARGV[0] and $ARGV[0] eq 'snmpconf') {
|
||||
print "number 1.3.6.1.4.1.789.1.5.6.0\n";
|
||||
print "index 1.3.6.1.4.1.789.1.5.4.1.1.\n";
|
||||
foreach (keys %oids) {
|
||||
print "require $oids{$_} [0-9]\n";
|
||||
}
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my $session = Munin::Plugin::SNMP->session();
|
||||
my ($host, undef, undef, $tail) = Munin::Plugin::SNMP->config_session();
|
||||
my ($df_id, $name_oid);
|
||||
|
||||
if ($tail =~ /^netapp_inodeusage_(\d+)$/) {
|
||||
$df_id = $1;
|
||||
$name_oid = '1.3.6.1.4.1.789.1.5.4.1.2.' . $df_id;
|
||||
} else {
|
||||
die "Couldn't understand what I'm supposed to monitor";
|
||||
}
|
||||
|
||||
if (defined $ARGV[0] and $ARGV[0] eq "config") {
|
||||
my $df_name = $session->get_single($name_oid);
|
||||
|
||||
print "host_name $host\n" unless $host eq 'localhost';
|
||||
print "graph_title $host inodes usage on $df_name\n";
|
||||
print "graph_args --base 1000 --lower-limit 0\n";
|
||||
print "graph_vlabel bytes\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_info This graph shows the inodes usage for $df_name on NetApp host $host\n";
|
||||
print "graph_order used avail total\n";
|
||||
print "used.info The total inodes number of inodes in use on the $df_name file system.\n";
|
||||
print "used.type GAUGE\n";
|
||||
print "used.draw AREA\n";
|
||||
print "used.label Used\n";
|
||||
print "used.min 0\n";
|
||||
print "used.colour $palette[1]\n";
|
||||
print "avail.info The total number of inodes that are free for use on the $df_name file system.\n";
|
||||
print "avail.type GAUGE\n";
|
||||
print "avail.draw STACK\n";
|
||||
print "avail.label Available\n";
|
||||
print "avail.min 0\n";
|
||||
print "avail.colour $palette[3]\n";
|
||||
print "total.info The total capacity for the $df_name file system.\n";
|
||||
print "total.type GAUGE\n";
|
||||
print "total.draw LINE2\n";
|
||||
print "total.label Total\n";
|
||||
print "total.min 0\n";
|
||||
print "total.colour $palette[7]\n";
|
||||
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my $used = $session->get_single($oids{dfInodesUsed}.$df_id);
|
||||
my $avail = $session->get_single($oids{dfInodesFree}.$df_id);
|
||||
my $total = $used + $avail;
|
||||
|
||||
print "used.value $used\n";
|
||||
print "avail.value $avail\n";
|
||||
print "total.value $total\n";
|
||||
|
||||
exit 0;
|
||||
|
||||
__END__
|
172
plugins/disk/snmp__swap
Executable file
172
plugins/disk/snmp__swap
Executable file
|
@ -0,0 +1,172 @@
|
|||
#!/usr/bin/perl -w
|
||||
#
|
||||
# Copyright (C) 2006 Lars Strand
|
||||
#
|
||||
# Munin plugin to monitor swap usage by use of SNMP.
|
||||
# Based on the snmp__df plugin
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; version 2 dated June,
|
||||
# 1991.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
#
|
||||
# $Log$
|
||||
#
|
||||
#%# family=snmpauto
|
||||
#%# capabilities=snmpconf
|
||||
|
||||
use strict;
|
||||
use Net::SNMP;
|
||||
|
||||
my $DEBUG = 0;
|
||||
my $MAXLABEL = 20;
|
||||
|
||||
my $host = $ENV{host} || undef;
|
||||
my $port = $ENV{port} || 161;
|
||||
my $community = $ENV{community} || "public";
|
||||
my $iface = $ENV{interface} || undef;
|
||||
|
||||
my $response;
|
||||
|
||||
if (defined $ARGV[0] and $ARGV[0] eq "snmpconf")
|
||||
{
|
||||
# HOST-RESOURCES-MIB::hrStorage
|
||||
# HOST-RESOURCES-TYPES::hrStorageVirtualMemory
|
||||
print "require 1.3.6.1.2.1.25.2. 1.3.6.1.2.1.25.2.1.3\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
if ($0 =~ /^(?:|.*\/)snmp_([^_]+)_swap$/)
|
||||
{
|
||||
$host = $1;
|
||||
if ($host =~ /^([^:]+):(\d+)$/)
|
||||
{
|
||||
$host = $1;
|
||||
$port = $2;
|
||||
}
|
||||
}
|
||||
elsif (!defined($host))
|
||||
{
|
||||
print "# Debug: $0 -- $1\n" if $DEBUG;
|
||||
die "# Error: couldn't understand what I'm supposed to monitor.";
|
||||
}
|
||||
|
||||
my ($session, $error) = Net::SNMP->session(
|
||||
-hostname => $host,
|
||||
-community => $community,
|
||||
-port => $port
|
||||
);
|
||||
|
||||
if (!defined ($session))
|
||||
{
|
||||
die "Croaking: $error";
|
||||
}
|
||||
|
||||
my $hrStorage = "1.3.6.1.2.1.25.2.";
|
||||
my $hrStorageVirtualMemory = "1.3.6.1.2.1.25.2.1.3";
|
||||
my $hrStorageSize = "1.3.6.1.2.1.25.2.3.1.5.";
|
||||
my $hrStorageUsed = "1.3.6.1.2.1.25.2.3.1.6.";
|
||||
|
||||
my $swap_d = get_by_regex($session, $hrStorage, $hrStorageVirtualMemory);
|
||||
|
||||
my $swapsize = 0; my $swapused = 0;
|
||||
|
||||
foreach my $swap (keys %$swap_d)
|
||||
{
|
||||
$swapsize += get_single($session, $hrStorageSize . $swap);
|
||||
$swapused += get_single($session, $hrStorageUsed . $swap);
|
||||
}
|
||||
|
||||
if (defined $ARGV[0] and $ARGV[0] eq "config")
|
||||
{
|
||||
print "host_name $host\n";
|
||||
print "graph_title Virtual memory usage\n";
|
||||
if ($swapsize > 0)
|
||||
{
|
||||
print "graph_args -l 0 --base 1000 --upper-limit $swapsize\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
print "graph_args -l 0 --base 1000\n";
|
||||
}
|
||||
print "graph_vlabel Bytes\n";
|
||||
print "graph_category disk\n";
|
||||
print "graph_info This graph shows swap usage in bytes.\n";
|
||||
print "swap.label swap\n";
|
||||
print "swap.type DERIVE\n";
|
||||
print "swap.min 0\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
print "swap.value $swapused\n";
|
||||
|
||||
sub get_single
|
||||
{
|
||||
my $handle = shift;
|
||||
my $oid = shift;
|
||||
|
||||
print "# Getting single $oid..." if $DEBUG;
|
||||
|
||||
$response = $handle->get_request ($oid);
|
||||
|
||||
if (!defined $response->{$oid})
|
||||
{
|
||||
print "undef\n" if $DEBUG;
|
||||
return undef;
|
||||
}
|
||||
else
|
||||
{
|
||||
print "\"$response->{$oid}\"\n" if $DEBUG;
|
||||
return $response->{$oid};
|
||||
}
|
||||
}
|
||||
|
||||
sub get_by_regex
|
||||
{
|
||||
my $handle = shift;
|
||||
my $oid = shift;
|
||||
my $regex = shift;
|
||||
my $result = {};
|
||||
my $num = 0;
|
||||
my $ret = $oid . "0";
|
||||
my $response;
|
||||
|
||||
print "# Starting browse of $oid...\n" if $DEBUG;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if ($num == 0)
|
||||
{
|
||||
print "# Checking for $ret...\n" if $DEBUG;
|
||||
$response = $handle->get_request ($ret);
|
||||
}
|
||||
if ($num or !defined $response)
|
||||
{
|
||||
print "# Checking for sibling of $ret...\n" if $DEBUG;
|
||||
$response = $handle->get_next_request ($ret);
|
||||
}
|
||||
if (!$response)
|
||||
{
|
||||
return undef;
|
||||
}
|
||||
my @keys = keys %$response;
|
||||
$ret = $keys[0];
|
||||
print "# Analyzing $ret (compared to $oid)...\n" if $DEBUG;
|
||||
last unless ($ret =~ /^$oid/);
|
||||
$num++;
|
||||
next unless ($response->{$ret} =~ /$regex/);
|
||||
@keys = split (/\./, $ret);
|
||||
$result->{$keys[-1]} = $response->{$ret};;
|
||||
print "# Index $num: ", $keys[-1], " (", $response->{$ret}, ")\n" if $DEBUG;
|
||||
};
|
||||
return $result;
|
||||
}
|
67
plugins/disk/xfs_frag
Executable file
67
plugins/disk/xfs_frag
Executable file
|
@ -0,0 +1,67 @@
|
|||
#!/bin/bash
|
||||
|
||||
: <<=cut
|
||||
=head1 NAME
|
||||
|
||||
xfs_frag - Munin plugin to monitor the fragmentation level on your XFS filesystems
|
||||
|
||||
=head1 APPLICABLE SYSTEMS
|
||||
|
||||
Any machine with an XFS file system.
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
None, generally, but you may want to run as root and set a timeout.
|
||||
|
||||
[xfs_frag]
|
||||
user root
|
||||
timeout 90
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=auto contrib
|
||||
#%# capabilities=
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
1
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Paul Saunders L<darac+munin@darac.org.uk>
|
||||
|
||||
=cut
|
||||
|
||||
declare -a ARRY
|
||||
shopt -s nocasematch
|
||||
|
||||
case $1 in
|
||||
config)
|
||||
cat <<'EOF'
|
||||
graph_title XFS fragmentation
|
||||
graph_vlabel Percent
|
||||
graph_category disk
|
||||
EOF
|
||||
cat /etc/mtab | awk '{print $2 " " $3}' | while read LINE
|
||||
do
|
||||
ARRY=($LINE)
|
||||
if [[ ${ARRY[1]} =~ xfs ]]; then
|
||||
FIELDNAME=$(echo ${ARRY[0]} | sed 's/^[^A-Za-z_]/_/; s/[^A-Za-z0-9_]/_/g')
|
||||
echo "$FIELDNAME.label ${ARRY[0]}"
|
||||
echo "$FIELDNAME.type GAUGE"
|
||||
fi
|
||||
done
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
cat /etc/mtab | awk '{print $2 " " $3 " " $1}' | while read LINE
|
||||
do
|
||||
ARRY=($LINE)
|
||||
if [[ ${ARRY[1]} =~ xfs ]]; then
|
||||
FIELDNAME=$(echo ${ARRY[0]} | sed 's/^[^A-Za-z_]/_/; s/[^A-Za-z0-9_]/_/g')
|
||||
FRAG=$(xfs_db -c frag -r ${ARRY[2]} | sed 's/.*fragmentation factor \(.*\)%.*/\1/')
|
||||
echo $FIELDNAME.value $FRAG
|
||||
fi
|
||||
done
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue