diff --git a/plugins/other/fc-switch-ports b/plugins/other/fc-switch-ports index c6039e1a..76d8046c 100755 --- a/plugins/other/fc-switch-ports +++ b/plugins/other/fc-switch-ports @@ -1,8 +1,8 @@ #!/usr/bin/env python """ -Munin plugin which reports selected counters regarding ports on a SAN -FC-switch. Only enabled ports are considered. +Munin plugin which reports selected counters regarding ports on a +Brocade SAN FC-switch. Only enabled ports are considered. The counters shown: @@ -25,20 +25,29 @@ rx_crcs: CRC errors detected in received frames. Together with enc_out errors, CRC errors indicate a GBIC/SFP problem. -words: FC transmission words (each word comprises - four 10-bit units). Reflects how busy the - port is. +bits: Number of bits transmitted(tx)/received(rx) + by the port. Inspecting this graph will help + determining if the port is saturated. When symlinking to the plugin, indicate hostname like this: -fc_switch_ports_HOSTNAME +brocade_san_switch_ports_HOSTNAME # Special requirements: # - the pysnmp module; on RHEL 6 with EPEL 6, you may simply yum- # install it """ +# Note: In the SNMP output from brocade switches, the interesting +# counters are named with numbers starting with 1, while the +# ports' real names on the box and in the administration interface +# start with 0. And there doesn't seem to be a way to map between +# ifDesc and the interesting crc and enc_out counters :-( +# Therefore, this plugin is Brocade-specific, and thus some +# manipulation of port numbers are performed for the output +# of this plugin (see comments marked ARGH below). + # TODOs: -# - implement snmpconf +# - implement snmpconf? # Munin magic markers #%# family=manual @@ -81,16 +90,15 @@ fc_switch_ports_HOSTNAME # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ==================================================================== -# $Id: fc_switch_ports_ 15423 2011-03-01 13:21:14Z tra $ +# $Id: brocade_san_switch_ports_ 15443 2011-03-03 12:23:56Z tra $ import os, sys, re from pysnmp.entity.rfc3413.oneliner import cmdgen -my_canonical_name = 'fc_switch_ports_' # If called as - e.g. - - # fc_switch_ports_sansw1,then - # sansw1 will be interpreted as - # the host_name -community = 'public' +my_canonical_name = 'brocade_san_switch_ports_' # If called as - e.g. - + # brocade_san_switch_ports_sansw1,then + # sansw1 will be interpreted as + # the host_name # For reference: # SW-MIB::swFCPortLinkState = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.6 @@ -116,11 +124,15 @@ descriptions = { 'rx_crcs' : 'the number of CRC errors detected for frames received', 'enc_out' : 'encoding errors outside FC frame', 'enc_out_per_mframe': 'enc errors outside FC frame, per million frames of rx+tx trafic', - 'words' : 'transmitted/received words' + 'bits' : 'received(rx)/transmitted(tx) bits' } -# These counter types don't distinguish -combined_tx_rx_countertypes = [ 'rx_crcs', 'enc_out', 'enc_out_per_mframe' ] +rrd_types = { + 'rx_crcs' : 'GAUGE', + 'enc_out' : 'GAUGE', + 'enc_out_per_mframe': 'GAUGE', + 'bits' : 'COUNTER' +} # Some helper functions: @@ -167,68 +179,64 @@ def varBindTable2plainDict(varBindTable): def print_config(host_name,enabled_ports): print('host_name %s' % host_name) + # Per-port for counter_type in descriptions: for portnum in enabled_ports: - print('multigraph %s.port_%d' % (counter_type,portnum)) - print('graph_title Port %d %s' % (portnum,counter_type)) + print('multigraph %s.port_%d' % (counter_type,portnum-1)) # ARGH: numbering base stuff + print('graph_title Port %d %s' % (portnum-1,counter_type)) # ARGH: numbering base stuff print('graph_args --base 1000 -l 0') print('graph_category SAN') print('graph_info This graph shows the count of %s' % descriptions[counter_type]) - # for some of the graphs, there is an in/out aspect - if counter_type in combined_tx_rx_countertypes: + if counter_type == 'bits': + print('graph_vlabel bits rx (-) / tx (+) per ${graph_period}') + print('graph_order rx tx') + print('rx.label rx') + print('rx.graph no') + print('rx.type %s' % rrd_types[counter_type]) + print('rx.max 20000000000') # initial-spike prevention: 20Gbit/s is max FC speed + print('tx.label bps') + print('tx.negative rx') + print('tx.type %s' % rrd_types[counter_type]) + print('tx.max 20000000000') # initial-spike prevention: 20Gbit/s is max FC speed + else: print('graph_vlabel count') print('count.label count') - print('count.min 0') - - # enc_out_per_mframe is special; others will just get the implied default (GAUGE) - if counter_type == 'enc_out_per_mframe': - print('count.type COUNTER') - else: - print('graph_vlabel units in (-) / out (+) per ${graph_period}') - print('graph_order tx rx') - print('tx.label tx') - print('tx.graph no') - print('tx.min 0') - print('rx.label tx') - print('rx.negative tx') - print('rx.min 0') - print('rx.info units transmitted/received by this interface') + print('count.type %s' % rrd_types[counter_type]) + # Totals for counter_type in descriptions: print('multigraph %s' % counter_type) print('graph_title %s total %s' % (host_name,counter_type)) print('graph_args --base 1000 -l 0') print('graph_category SAN') print('graph_info This graph shows the total count of %s across all ports' % descriptions[counter_type]) - if counter_type in combined_tx_rx_countertypes: + + if counter_type == 'bits': + print('graph_vlabel bits rx (-) / tx (+) per ${graph_period}') + print('rx.label rx') + print('rx.graph no') + print('rx.type %s' % rrd_types[counter_type]) + print('rx.max 800000000000') # initial-spike prevention: Assuming a max of 40 ports with each 20Gbit/s max + print('tx.label bps') + print('tx.negative rx') + print('tx.type %s' % rrd_types[counter_type]) + print('tx.max 800000000000') # initial-spike prevention: Assuming a max of 40 ports with each 20Gbit/s max + else: print('graph_vlabel count') print('count.label count') - print('count.min 0') - - # enc_out_per_mframe is special; others will just get the implied default (GAUGE) - if counter_type == 'enc_out_per_mframe': - print('count.type COUNTER') - else: - print('graph_vlabel units in (-) / out (+) per ${graph_period}') - print('tx.label tx') - print('tx.graph no') - print('tx.min 0') - print('rx.label tx') - print('rx.negative tx') - print('rx.min 0') - print('rx.info units transmitted/received') + print('count.type %s' % rrd_types[counter_type]) # We don't care for disabled ports -def get_enabled_ports(host_name): - link_states = get_port_values(host_name,port_link_state_oidstr) +def get_enabled_ports(host_name,community): + link_states = get_port_values(host_name,community,port_link_state_oidstr) # status 1 means enabled return [ portnum for portnum in link_states if link_states[portnum] == 1 ] # Talk to the SNMP agent performing the equivalent of an snmpwalk from # the starting point indicated by the oid_start_tpl tuple. # Handle potential errors. -def pull_values(host_name,oid_start_tpl): +def pull_values(host_name,community,oid_start_tpl): try: errorIndication, errorStatus, errorIndex, varBindTable = cmdgen.CommandGenerator().bulkCmd( cmdgen.CommunityData('whatever', community), @@ -249,9 +257,9 @@ def pull_values(host_name,oid_start_tpl): # Combine oidstr2tupl, pull_values and varBindTable2plainDict. # Return dict of port-number => count -def get_port_values(host_name,oid_start_str): +def get_port_values(host_name,community,oid_start_str): return varBindTable2plainDict( - pull_values(host_name,oidstr2tuple(oid_start_str)) + pull_values(host_name,community,oidstr2tuple(oid_start_str)) ) # Initial sanity check @@ -273,7 +281,13 @@ if match: else: bailout('Missing host_name and/or counter type') -enabled_ports = get_enabled_ports(host_name) +# Determine SNMP community +try: + community = os.environ['community'] +except: + community = 'public' + +enabled_ports = get_enabled_ports(host_name,community) # See how we were called if n_args == 2: @@ -291,27 +305,27 @@ if n_args == 2: # Prepare some structures counters = {} -counters['rx_crcs' ] = get_port_values(host_name,oidstrs['rx_crcs' ]) -counters['enc_out' ] = get_port_values(host_name,oidstrs['enc_out' ]) -counters['tx_frames'] = get_port_values(host_name,oidstrs['tx_frames']) -counters['rx_frames'] = get_port_values(host_name,oidstrs['rx_frames']) -counters['tx_words' ] = get_port_values(host_name,oidstrs['tx_words' ]) -counters['rx_words' ] = get_port_values(host_name,oidstrs['rx_words' ]) +counters['rx_crcs' ] = get_port_values(host_name,community,oidstrs['rx_crcs' ]) +counters['enc_out' ] = get_port_values(host_name,community,oidstrs['enc_out' ]) +counters['rx_frames'] = get_port_values(host_name,community,oidstrs['rx_frames']) +counters['tx_frames'] = get_port_values(host_name,community,oidstrs['tx_frames']) +counters['rx_words' ] = get_port_values(host_name,community,oidstrs['rx_words' ]) +counters['tx_words' ] = get_port_values(host_name,community,oidstrs['tx_words' ]) totals = {} totals['rx_crcs'] = 0 totals['enc_out'] = 0 totals['enc_out_per_mframe'] = 0 -totals['tx_frames'] = 0 totals['rx_frames'] = 0 -totals['tx_words'] = 0 -totals['rx_words'] = 0 +totals['tx_frames'] = 0 +totals['rx_bits'] = 0 +totals['tx_bits'] = 0 # special handling of enc_out per million frames counters['enc_out_per_mframe'] = {} -for k in counters['tx_frames'].keys(): - if counters['tx_frames'][k] + counters['rx_frames'][k] > 0: - counters['enc_out_per_mframe'][k] = 1000000*counters['enc_out'][k] / (counters['tx_frames'][k] + counters['rx_frames'][k]) +for k in counters['rx_frames'].keys(): + if counters['rx_frames'][k] + counters['tx_frames'][k] > 0: + counters['enc_out_per_mframe'][k] = 1000000*counters['enc_out'][k] / (counters['rx_frames'][k] + counters['tx_frames'][k]) else: counters['enc_out_per_mframe'][k] = 0 @@ -323,31 +337,31 @@ for k in counters['tx_frames'].keys(): # Per-port values for portnum in enabled_ports: for counter_type in descriptions: - print('multigraph %s.port_%d' % (counter_type,portnum)) + print('multigraph %s.port_%d' % (counter_type,portnum-1)) # ARGH: numbering base stuff - # for some of the graphs, there is an in/out aspect - if counter_type in combined_tx_rx_countertypes: + # For some of the graphs, there is an in/out aspect, for others + # they are combined or not applicable + if counter_type == 'bits': + rx_value = counters['rx_words'][portnum] + tx_value = counters['tx_words'][portnum] + rx_bits = rx_value * 40 # Each word consists of four + tx_bits = tx_value * 40 # 10-bit units. + print('rx.value %d' % rx_bits) + print('tx.value %d' % tx_bits) + totals['rx_bits'] += rx_bits + totals['tx_bits'] += tx_bits + else: print('count.value %d' % counters[counter_type][portnum]) totals[counter_type] += counters[counter_type][portnum] - else: - tx_key = 'tx_%s' % counter_type - rx_key = 'rx_%s' % counter_type - tx_value = counters[tx_key][portnum] - rx_value = counters[rx_key][portnum] - print('tx.value %d' % tx_value) - print('rx.value %d' % rx_value) - totals[tx_key] += tx_value - totals[rx_key] += rx_value # Totals for counter_type in descriptions: print('multigraph %s' % (counter_type)) - # for some of the graphs, there is an in/out aspect - if counter_type in combined_tx_rx_countertypes: - print('count.value %d' % totals[counter_type]) + # For some of the graphs, there is an in/out aspect, for others + # they are combined or not applicable + if counter_type == 'bits': + print('rx.value %d' % totals['rx_bits']) + print('tx.value %d' % totals['tx_bits']) else: - tx_key = 'tx_%s' % counter_type - rx_key = 'rx_%s' % counter_type - print('tx.value %d' % totals[tx_key]) - print('rx.value %d' % totals[rx_key]) + print('count.value %d' % totals[counter_type])