From a99c217e27ead7c2bbbf1fc2b5fef6824b38e2b9 Mon Sep 17 00:00:00 2001 From: Michael Renner Date: Tue, 25 May 2010 09:48:32 +0200 Subject: [PATCH] Initial version --- plugins/other/linux_diskstat_ | 702 ++++++++++++++++++++++++++++++++++ 1 file changed, 702 insertions(+) create mode 100755 plugins/other/linux_diskstat_ diff --git a/plugins/other/linux_diskstat_ b/plugins/other/linux_diskstat_ new file mode 100755 index 00000000..7891eebf --- /dev/null +++ b/plugins/other/linux_diskstat_ @@ -0,0 +1,702 @@ +#!/usr/bin/perl -w +# vim: sts=4 sw=4 ts=8 + +# Munin markers: +#%# family=auto +#%# capabilities=autoconf suggest + +# Author: Michael Renner + +# Version: 0.0.5, 2009-05-22 + + + +=head1 NAME + +linux_diskstat_ - Munin plugin to monitor various values provided +via C + +=head1 APPLICABLE SYSTEMS + +Linux 2.6 systems with extended block device statistics enabled. + + +=head1 INTERPRETATION + +Among the more self-describing or well-known values like C +(Bytes per second) there are a few which might need further introduction. + + +=head2 Device Utilization + +Linux provides a counter which increments in a millisecond-interval for as long +as there are outstanding I/O requests. If this counter is close to 1000msec +in a given 1 second timeframe the device is nearly 100% saturated. This plugin +provides values averaged over a 5 minute time frame per default, so it can't +catch short-lived saturations, but it'll give a nice trend for semi-uniform +load patterns as they're expected in most server or multi-user environments. + + +=head2 Device IO Time + +The C takes the counter described under C +and divides it by the number of I/Os that happened in the given time frame, +resulting in an average time per I/O on the block-device level. + +This value can give you a good comparison base amongst different controllers, +storage subsystems and disks for similiar workloads. + + +=head2 Syscall Wait Time + +These values describe the average time it takes between an application issuing +a syscall resulting in a hit to a blockdevice to the syscall returning to the +application. + +The values are bound to be higher (at least for read requests) than the time +it takes the device itself to fulfill the requests, since calling overhead, +queuing times and probably a dozen other things are included in those times. + +These are the values to watch out for when an user complains that C. + + +=head3 What causes a block device hit? + +A non-exhaustive list: + +=over + +=item * Reads from files when the given range is not in the page cache or the O_DIRECT +flag is set. + +=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio +is exceeded. + +=item * Filesystem metadata operations (stat(2), getdents(2), file creation, +modification of any of the values returned by stat(2), etc.) + +=item * The pdflush daemon writing out dirtied pages + +=item * (f)sync + +=item * Swapping + +=item * raw device I/O (mkfs, dd, etc.) + +=back + +=head1 ACKNOWLEDGEMENTS + +The core logic of this script is based on the B tool of the B +package written and maintained by Sebastien Godard. + +=head1 SEE ALSO + +See C in your Linux source tree for further information +about the C involved in this module. + +L has a nice writeup +about the pdflush daemon. + +=head1 AUTHOR + +Michael Renner + +=head1 LICENSE + +GPLv2 + + +=cut + + +use strict; + + +use File::Basename; +use Carp; +use POSIX; + +# We load our own version of save/restore_state if Munin::Plugin is unavailable. +# Don't try this at home +eval { require Munin::Plugin; Munin::Plugin->import; }; + +if ($@) { + fake_munin_plugin(); +} + + +# Sanity check to ensure that the script is called the correct name. + +if (basename($0) !~ /^linux_diskstat_/) { + die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n); +} + + +############ +# autoconf # +############ + +if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) { + my %stats; + + # Capture any croaks on the way + eval { %stats = parse_diskstats() }; + + if ( !$@ && keys %stats ) { + + print "yes\n"; + exit 0; + } + else { + print "no\n"; + exit 1; + } +} + + +########### +# suggest # +########### + +if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) { + + my %diskstats = parse_diskstats(); + + my %suggested_devices; + + DEVICE: + for my $devname ( sort keys %diskstats ) { + + # Skip devices without traffic + next + if ( $diskstats{$devname}->{'rd_ios'} == 0 + && $diskstats{$devname}->{'wr_ios'} == 0 ); + + for my $existing_device ( @{ $suggested_devices{'iops'} } ) { + + # Filter out devices (partitions) which are matched by existing ones + # e.g. sda1 -> sda, c0d0p1 -> c0d0 + next DEVICE if ( $devname =~ m/$existing_device/ ); + } + + push @{ $suggested_devices{'iops'} }, $devname; + push @{ $suggested_devices{'throughput'} }, $devname; + + # Only suggest latency graphs if the device supports it + if ( $diskstats{$devname}->{'rd_ticks'} > 0 + || $diskstats{$devname}->{'wr_ticks'} > 0 ) + { + push @{ $suggested_devices{'latency'} }, $devname; + } + } + + for my $mode ( keys %suggested_devices ) { + for my $device ( sort @{ $suggested_devices{$mode} } ) { + + my $printdev = translate_device_name($device, 'TO_FS'); + print "${mode}_$printdev\n"; + } + } + + exit 0; +} + + +# Reading the scripts invocation name and setting some parameters, +# needed from here on + +my $basename = basename($0); +my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/; + +if ( not defined $device ) { + croak "Didn't get a device name. Aborting\n"; +} + +$device = translate_device_name($device, 'FROM_FS'); + +########## +# config # +########## + +if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) { + + my $pretty_device = $device; + + if ($device =~ /^dm-\d+$/) { + $pretty_device = translate_devicemapper_name($device); + } + + if ( $mode eq 'latency' ) { + + print <{'rd_ios'} - $prev_stats->{'rd_ios'}; + my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'}; + + my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'}; + my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'}; + + my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'}; + my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'}; + + my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'}; + + + my $read_io_per_sec = $read_ios / $interval; + my $write_io_per_sec = $write_ios / $interval; + + my $read_bytes_per_sec = $rd_sectors / $interval * $bytes_per_sector; + my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector; + + + my $total_ios = $read_ios + $write_ios; + my $total_ios_per_sec = $total_ios / $interval; + + # Utilization - or "how busy is the device"? + # If the time spent for I/O was close to 1000msec for + # a given second, the device is nearly 100% saturated. + my $utilization = $tot_ticks / $interval; + + # Average time an I/O takes on the block device + my $servicetime = + $total_ios_per_sec ? $utilization / $total_ios_per_sec : 0; + + # Average wait time for an I/O from start to finish + # (includes queue times et al) + my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0; + my $average_rd_wait = $read_ios ? $rd_ticks / $read_ios : 0; + my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0; + + my $average_rq_size_in_kb = + $total_ios + ? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios + : 0; + my $average_rd_rq_size_in_kb = + $read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0; + my $average_wr_rq_size_in_kb = + $write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0; + + my $util_print = $utilization / 10; + + + if ( $mode eq 'latency' ) { + print <) { + + # Strip trailing newline and leading whitespace + chomp $line; + $line =~ s/^\s+//; + + my @elems = split /\s+/, $line; + + # We explicitly don't support old-style diskstats + # There are situations where only _some_ lines (e.g. + # partitions on older 2.6 kernels) have fewer stats + # numbers, therefore we'll skip them silently + if ( @elems != 14 ) { + next; + } + push @lines, \@elems; + } + + close STAT or croak "Failed to close '/proc/diskstats': $!"; + return @lines; +} + +sub read_sysfs { + + my ($want_device) = @_; + + my @devices; + my @lines; + + if ( defined $want_device ) { + + # sysfs uses '!' as replacement for '/', e.g. cciss!c0d0 + $want_device =~ tr#/#!#; + @devices = $want_device; + } + else { + @devices = glob "/sys/block/*/stat"; + @devices = map { m!/sys/block/([^/]+)/stat! } @devices; + } + + + for my $cur_device (@devices) { + my $stats_file = "/sys/block/$cur_device/stat"; + + open STAT, "< $stats_file" + or croak "Failed to open '$stats_file': $!\n"; + + my $line = ; + + # Trimming whitespace + $line =~ s/^\s+//; + chomp $line; + + my @elems = split /\s+/, $line; + + croak "'$stats_file' doesn't contain exactly 11 values. Aborting" + if ( @elems != 11 ); + + # Translate the devicename back before storing the information + $cur_device =~ tr#!#/#; + + # Faking missing diskstats values + unshift @elems, ( '', '', $cur_device ); + + push @lines, \@elems; + + close STAT or croak "Failed to close '$stats_file': $!\n"; + } + + return @lines; +} + + +sub parse_diskstats { + + my ($want_device) = @_; + + my @stats; + + if ( glob "/sys/block/*/stat" ) { + + @stats = read_sysfs($want_device); + } + else { + @stats = read_diskstats(); + } + + my %diskstats; + + for my $entry (@stats) { + + my %devstat; + + # Hash-Slicing for fun and profit + @devstat{ + qw(major minor devname + rd_ios rd_merges rd_sectors rd_ticks + wr_ios wr_merges wr_sectors wr_ticks + ios_in_prog tot_ticks rq_ticks) + } + = @{$entry}; + + $diskstats{ $devstat{'devname'} } = \%devstat; + } + + return %diskstats; +} + +sub fetch_device_counters { + + my ($want_device) = @_; + + my %diskstats = parse_diskstats($want_device); + + for my $devname ( keys %diskstats ) { + + if ( $want_device eq $devname ) { + return %{ $diskstats{$devname} }; + } + } + return undef; +} + + +# We use '+' (and formerly '-') as placeholder for '/' in device-names +# used as calling name for the script. +sub translate_device_name { + + my ($device, $mode) = @_; + + if ($mode eq 'FROM_FS') { + + # Hackaround to mitigate issues with unwisely chosen former separator + if ( not ($device =~ m/dm-\d+/)) { + $device =~ tr#-+#//#; + } + + } + elsif ($mode eq 'TO_FS') { + + $device =~ tr#/#+#; + + } + else { + croak "translate_device_name: Unknown mode\n"; + } + + return $device; +} + + +sub fake_munin_plugin { + my $eval_code = <<'EOF'; + +use Storable; +my $storable_filename = basename($0); +$storable_filename = "/tmp/munin-state-$storable_filename"; + +sub save_state { + my @state = @_; + + if ( not -e $storable_filename or -f $storable_filename ) { + store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n"; + } + else { + croak "$storable_filename is probably not a regular file. Please delete it.\n"; + } +} + +sub restore_state { + + if (-f $storable_filename) { + my $state = retrieve($storable_filename); + return @{$state}; + } + else { + return undef; + } +} +EOF + + eval($eval_code); +} + +sub translate_devicemapper_name { + my ($device) = @_; + + my ($want_minor) = $device =~ m/^dm-(\d+)$/; + + croak "Failed to extract devicemapper id" unless defined ($want_minor); + + my $dm_major = find_devicemapper_major(); + croak "Failed to get device-mapper major number\n" unless defined $dm_major; + + for my $entry (glob "/dev/mapper/\*") { + + my $rdev = (stat($entry))[6]; + my $major = floor($rdev / 256); + my $minor = $rdev % 256; + + if ($major == $dm_major && $minor == $want_minor) { + + my $pretty_name = translate_lvm_name($entry); + + return defined $pretty_name ? $pretty_name : $entry; + + } + } + # Return original string if the device can't be found. + return $device; +} + + + +sub translate_lvm_name { + + my ($entry) = @_; + + my $device_name = basename($entry); + + # Check for single-dash-occurence to see if this could be a lvm devicemapper device. + if ($device_name =~ m/(?) { + chomp $line; + + my ($major, $name) = split /\s+/, $line, 2; + + next unless defined $name; + + if ($name eq 'device-mapper') { + $dm_major = $major; + last; + } + } + close(FH); + + return $dm_major; +}