diff --git a/plugins/other/raid b/plugins/other/raid index 43c020c0..814661ec 100755 --- a/plugins/other/raid +++ b/plugins/other/raid @@ -1,7 +1,7 @@ #!/usr/bin/perl -w # # (c) 2007 Nathan Rutman nathan@clusterfs.com -# +# # Plugin to monitor RAID status # # Results are % of healthy drives in a raid device @@ -10,22 +10,17 @@ #%# family=contrib #%# capabilities=autoconf -if ($ARGV[0] and $ARGV[0] eq "autoconf") -{ - if (-r "/proc/mdstat" and `grep md /proc/mdstat`) - { +if ($ARGV[0] and $ARGV[0] eq "autoconf") { + if (-r "/proc/mdstat" and `grep md /proc/mdstat`) { print "yes\n"; exit 0; - } - else - { + } else { print "no RAID devices\n"; exit 1; } } -if ( $ARGV[0] and $ARGV[0] eq "config" ) -{ +if ( $ARGV[0] and $ARGV[0] eq "config" ) { print "graph_title RAID status\n"; print "graph_category disk\n"; print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n"; @@ -41,35 +36,31 @@ if ( $ARGV[0] and $ARGV[0] eq "config" ) my $text = ; close MDSTAT; - while ($text =~ /(md\d)\s+:\s+active\s+(\w+)\s+(.*)\n.*\[(\d+)\/(\d+)]\s+\[(\w+)]/ ) - { - my($dev,$type,$members,$nmem,$nact,$status) = ($1,$2,$3,$4,$5,$6); - #print "item: $dev $type ($members) status=$status \n"; - if ( $ARGV[0] and $ARGV[0] eq "config" ) - { + # Should look like "active raid1 sda1[0] sdc1[2] sdb1[1]" + # Interestingly, swap is presented as "active (auto-read-only)" + while ($text =~ /(md\d)\s+:\s+active\s+(\(auto-read-only\)\s+|)(\w+)\s+(.*)\n.*\[(\d+)\/(\d+)]\s+\[(\w+)]/ ) { + my($dev,$dummy,$type,$members,$nmem,$nact,$status) = ($1,$2,$3,$4,$5,$6,$7); +# print "$text\nitem: $dev $type ($members) status=$status \n"; + if ( $ARGV[0] and $ARGV[0] eq "config" ) { print "$dev.label $dev\n"; print "$dev.info $type $members\n"; # 100: means less than 100 - print "$dev.critical 100:\n"; + # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. + print "$dev.critical 98:\n"; print $dev, "_rebuild.label $dev rebuilt\n"; print $dev, "_rebuild.info $type\n"; - print $dev, "_rebuild.critical 100:\n"; - } - else - { + # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. + print $dev, "_rebuild.critical 98:\n"; + } else { my $pct = 100 * $nact / $nmem; my $rpct = 100; - if ( $pct < 100 ) - { - my $output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`; - if( $output =~ /([0-9]+)% complete/ ) - { - $rpct = $1; - } - else - { - $rpct = 0; - } + if ( $pct < 100 ) { + my $output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`; + if( $output =~ /([0-9]+)% complete/ ) { + $rpct = $1; + } else { + $rpct = 0; + } } print "$dev.value $pct\n"; print $dev, "_rebuild.value $rpct\n"; @@ -79,3 +70,4 @@ if ( $ARGV[0] and $ARGV[0] eq "config" ) } exit 0; +