diff --git a/plugins/disk/raid b/plugins/disk/raid index 440fddce..120bc366 100755 --- a/plugins/disk/raid +++ b/plugins/disk/raid @@ -12,11 +12,11 @@ if ($ARGV[0] and $ARGV[0] eq "autoconf") { if (-r "/proc/mdstat" and `grep md /proc/mdstat`) { - print "yes\n"; - exit 0; + print "yes\n"; + exit 0; } else { - print "no RAID devices\n"; - exit 1; + print "no RAID devices\n"; + exit 1; } } @@ -29,52 +29,105 @@ if ( $ARGV[0] and $ARGV[0] eq "config" ) { print "graph_scale no\n"; } -{ - local( $/, *MDSTAT ) ; - open (MDSTAT, "/proc/mdstat") or exit 1; - #open (MDSTAT, "/etc/munin/plugins/sample.failed") or exit 1; - my $text = ; - close MDSTAT; +open(my $mdstat, "/proc/mdstat"); +my(@text) = <$mdstat>; +# contents of <$mdstat> may be changed at next reading, so fetch the contents at a time +close($mdstat); - # Should look like "active raid1 sda1[0] sdc1[2] sdb1[1]" - # Interestingly, swap is presented as "active (auto-read-only)" - while ($text =~ /(md\d+)\s+:\s+active\s+(\(auto-read-only\)\s+|)(\w+)\s+(.*)\n.*\[(\d+)\/(\d+)]\s+\[(\w+)]\n(.*(check|resync)\s=\s+(\d+\.\d+)%|.*\n)/ ) { - my($dev,$dummy,$type,$members,$nmem,$nact,$status,$dummy2,$dummy3,$proc) = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10); -# print "$text\nitem: $dev $type ($members) status=$status $proc\n"; - if ( $ARGV[0] and $ARGV[0] eq "config" ) { - print "$dev.label $dev\n"; - print "$dev.info $type $members\n"; - # 100: means less than 100 - # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. - print "$dev.critical 98:\n"; - print $dev, "_rebuild.label $dev rebuilt\n"; - print $dev, "_rebuild.info $type\n"; - # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. - print $dev, "_rebuild.critical 98:\n"; - print $dev, "_check.label $dev check/resync \n"; - print $dev, "_check.info $type\n"; - } else { - my $pct = 100 * $nact / $nmem; - my $rpct = 100; - if ( $pct < 100 ) { - my @output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`; - if( $output[0] and $output[0] =~ /([0-9]+)% complete/ ) { - $rpct = $1; - } else { - $rpct = 0; - } - } - if ( $proc ) { - $cpct = $proc; - } else { +my($devinfo_re, $devstat_re, $action_re) = ( + '(md\d+)\s+:\s+active\s+(\(read-only\)\s+|\(auto-read-only\)\s+|)(\w+)\s+(.*)', + '.*\[(\d+)\/(\d+)]\s+\[(\w+)]', + '.*(reshape|check|resync|recovery)\s*=\s*(\d+\.\d+%|\w+)(.*finish=(.*min))?', +); +# Interestingly, swap is presented as "active (auto-read-only)" +# and mdadm has '--readonly' option to make the array 'active (read-only)' + +my($dev, $ro, $type, $members, $nmem, $nact, $status, $action, $proc, $minute); +while (@text) { + my $line = shift @text; + if ($line =~ /$devinfo_re/) { + # first line should like "active raid1 sda1[0] sdc1[2] sdb1[1]" + $dev = $1; + $ro = $2 || ''; + $type = $3; + $members = $4; + + $line = shift @text; + if ($line =~ /$devstat_re/) { + # second line should like "123456 blocks super 1.2 [2/2] [UU]" + $nmem = $1; + $nact = $2; + $status = $3; + } + else { + # sencond line did not exist on /proc/mdstat + next; + } + + $line = shift @text; + if ($line =~ /$action_re/) { + # third line should like " [==>..................] check = 10.0% (12345/123456) finish=123min speed=12345/sec" + # this line will appear only when the array is in action + $action = $1; + my $percent = $2; + $minute = $4 || ''; + if ($percent =~ /(\d+\.\d+)%/) { + $proc = $1; + } + else { + # 'resync=DELAYED' or 'resync=PENDING' + $action .= " ($percent)"; + $proc = -1; + } + } + else { + # array is not in action + $action = 'idle'; + $minute = ''; + unshift(@text, $line); + } + } + else { + # skip until first line is found + next; + } + + if ( $ARGV[0] and $ARGV[0] eq "config" ) { + print "$dev.label $dev\n"; + print "$dev.info $type $ro$members\n"; + # 100: means less than 100 + # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. + print "$dev.critical 98:\n"; + print $dev, "_rebuild.label $dev reshape/recovery\n"; + print $dev, "_rebuild.info $action $minute\n"; + # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. + print $dev, "_rebuild.critical 98:\n"; + print $dev, "_check.label $dev check/resync \n"; + print $dev, "_check.info $action $minute\n"; + } else { + my $pct = 100 * $nact / $nmem; + my $rpct = 100; + my $cpct = 100; + if ($action =~ /reshape|recovery/) { + $rpct = $proc; + $cpct = 0; # check/resync is not done + } + elsif ($action =~ /check|resync/) { + if ($proc < 0) { + # array is on DELAYED or PENDING, further info is unknown + $rpct = 0; $cpct = 0; } - print "$dev.value $pct\n"; - print $dev, "_rebuild.value $rpct\n"; - print $dev, "_check.value $cpct\n"; - } - $text = $'; - } + else { + # reshape/recovery was done, $rpct => 100 + $cpct = $proc; + } + } + + print "$dev.value $pct\n"; + print $dev, "_rebuild.value $rpct\n"; + print $dev, "_check.value $cpct\n"; + } } exit 0;