#!/usr/bin/env python3 """ =pod =head1 NAME btrfs_device_stats - Script to monitor btrfs device statistics =head1 CONFIGURATION Simply create a symlink in your plugins directory like with any other plugin. Must be run as root. [btrfs_device_stats] user root You can optionaly configure the warning and critical limits. By default warning is set to 1 and critical is not set at all. You can set the limits either for the entire plugin or per individual metric. The individual values take precedence over the general ones. See the following example: [btrfs_device_stats] user root env.warning 2 env.critical 4 env.flags_warning 23 env.read_errs_critical 42 =head2 DEFAULT CONFIGURATION =head1 BUGS =head1 AUTHOR 2019-2021, HaseHarald =head1 MAGIC MARKERS #%# family=auto #%# capabilities=autoconf =head1 LICENSE LGPLv3 =cut """ # This file contains a munin-plugin to gather btrfs statistics per device. # # This is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this plugin. If not, see . import btrfs import os import sys def munin_config(fs): fsid = str(fs.fsid).replace('-', '_') print("multigraph btrfs_device_stats_" + fsid) print("graph_args --base 1000 -l 0") print("graph_vlabel total btrfs attribute value") print("graph_title btrfs total device stats for " + fs.path) print("graph_category disk") print("graph_info This graph shows the total stats of devices used by btrfs") print("corruption_errs_total.label Corruption Errors") print("flush_errs_total.label Flush Errors") print("generation_errs_total.label Generation Errors") print("read_errs_total.label Read Errors") print("write_errs_total.label Write Errors") print("nr_items_total.label Nr. of Items") print("flags_total.label Nr. of Flags") print("") devices = fs.devices() warning = os.getenv('warning', default = 1) critical = os.getenv('critical', default = False) for this_device in devices: this_dev_info = fs.dev_info(this_device.devid) this_dev_name = this_dev_info.path.replace('/dev/', '') print("multigraph btrfs_device_stats_" + fsid + "." + str(this_device.devid)) print("graph_args --base 1000 -l 0") print("graph_vlabel btrfs attribute value") print("graph_title btrfs device stats for " + this_dev_name) print("graph_category disk") print("graph_info This graph shows stats of devices used by btrfs") print("corruption_errs.label Corruption Errors") print("corruption_errs.warning " + os.getenv('corruption_errs_warning', default = str(warning))) if os.getenv('corruption_errs_critical', default = critical): print("corruption_errs.critical " + os.getenv('corruption_errs_critical', default = str(critical))) print("flush_errs.label Flush Errors") print("flush_errs.warning " + os.getenv('flush_errs_warning', default = str(warning))) if os.getenv('flush_errs_critical', default = critical): print("flush_errs.critical " + os.getenv('flush_errs_critical', default = str(critical))) print("generation_errs.label Generation Errors") print("generation_errs.warning " + os.getenv('generation_errs_warning', default = str(warning))) if os.getenv('generation_errs_critical', default = critical): print("generation_errs.critical " + os.getenv('generation_errs_critical', default = str(critical))) print("read_errs.label Read Errors") print("read_errs.warning " + os.getenv('read_errs_warning', default = str(warning))) if os.getenv('read_errs_critical', default = critical): print("read_errs.critical " + os.getenv('read_errs_critical', default = str(critical))) print("write_errs.label Write Errors") print("write_errs.warning " + os.getenv('write_errs_warning', default = str(warning))) if os.getenv('write_errs_critical', default = critical): print("write_errs.critical " + os.getenv('write_errs_critical', default = str(critical))) print("nr_items.label Nr. of Items") print("flags.label Nr. of Flags") print("flags.warning " + os.getenv('flags_warning', default = str(warning))) if os.getenv('flags_critical', default = critical): print("flags.critical " + os.getenv('flags_critical', default = str(critical))) print("") def munin_values(fs): corruption_errs_total = 0 flush_errs_total = 0 generation_errs_total = 0 read_errs_total = 0 write_errs_total = 0 nr_items_total = 0 flags_total = 0 fsid = str(fs.fsid).replace('-', '_') devices = fs.devices() for this_device in devices: this_dev_stat = fs.dev_stats(this_device.devid, False) corruption_errs = this_dev_stat.corruption_errs flush_errs = this_dev_stat.flush_errs generation_errs = this_dev_stat.generation_errs read_errs = this_dev_stat.read_errs write_errs = this_dev_stat.write_errs nr_items = this_dev_stat.nr_items flags = this_dev_stat.flags corruption_errs_total = corruption_errs_total + corruption_errs flush_errs_total = flush_errs_total + flush_errs generation_errs_total = generation_errs_total + generation_errs read_errs_total = read_errs_total + read_errs write_errs_total = write_errs_total + write_errs nr_items_total = nr_items_total + nr_items flags_total = flags_total + flags print("multigraph btrfs_device_stats_" + fsid + "." + str(this_device.devid)) print("corruption_errs.value " + str(corruption_errs)) print("flush_errs.value " + str(flush_errs)) print("generation_errs.value " + str(generation_errs)) print("read_errs.value " + str(read_errs)) print("write_errs.value " + str(write_errs)) print("nr_items.value " + str(nr_items)) print("flags.value " + str(flags)) print("") print("multigraph btrfs_device_stats_" + fsid) print("corruption_errs_total.value " + str(corruption_errs_total)) print("flush_errs_total.value " + str(flush_errs_total)) print("generation_errs_total.value " + str(generation_errs_total)) print("read_errs_total.value " + str(read_errs_total)) print("write_errs_total.value " + str(write_errs_total)) print("nr_items_total.value " + str(nr_items_total)) print("flags_total.value " + str(flags_total)) print("") def main(): for path in btrfs.utils.mounted_filesystem_paths(): with btrfs.FileSystem(path) as fs: if len(sys.argv) > 1 and sys.argv[1] == "config": munin_config(fs) else: munin_values(fs) if __name__ == "__main__": main() exit(0)