#!/usr/bin/python3 -tt # -*- coding: utf-8 -*- """ Munin plugin to monitor Varnish 4 status. Copyright 2016, Kim B. Heino, b@bbbs.net, Foobar Oy License GPLv2+ This is based heavily on varnish4_ plugin from Munin contrib by Kristian Lyngstol / Redpill Linpro AS. #%# capabilities=autoconf #%# family=auto """ # pylint: disable=invalid-name # pylint: enable=invalid-name import json import subprocess import sys def run_binary(arg): """Run binary and return output.""" try: cmd = subprocess.Popen( arg, shell=False, close_fds=True, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outdata, dummy_errdata = cmd.communicate() except OSError: return '' return outdata.decode('utf-8', 'ignore') def get_values(): """Run varnishstat and parse it's output.""" output = run_binary(['/usr/bin/varnishstat', '-j']) try: return json.loads(output) except (TypeError, ValueError): return {} def autoconf(): """Print "yes" or "no".""" status = 'yes' if get_values() else 'no' print(status) def config(): """Print plugin config.""" # pylint: disable=too-many-statements # backend_traffic print('multigraph varnish_backend_traffic') print('graph_category webserver') print('graph_title Backend traffic') print('backend_busy.label Backend conn. too many') print('backend_busy.min 0') print('backend_busy.type DERIVE') print('backend_conn.label Backend conn. success') print('backend_conn.min 0') print('backend_conn.type DERIVE') print('backend_retry.label Backend conn. retry') print('backend_retry.min 0') print('backend_retry.type DERIVE') print('backend_unhealthy.label Backend conn. not attempted') print('backend_unhealthy.min 0') print('backend_unhealthy.warning :1') print('backend_unhealthy.type DERIVE') print('backend_recycle.label Backend conn. recycles') print('backend_recycle.min 0') print('backend_recycle.type DERIVE') print('backend_fail.label Backend conn. failures') print('backend_fail.min 0') print('backend_fail.type DERIVE') print('backend_toolate.label Backend conn. was closed') print('backend_toolate.min 0') print('backend_toolate.type DERIVE') print('backend_reuse.label Backend conn. reuses') print('backend_reuse.min 0') print('backend_reuse.type DERIVE') print('backend_req.label Backend requests made') print('backend_req.min 0') print('backend_req.type DERIVE') # bad print('multigraph varnish_bad') print('graph_category webserver') print('graph_title Misbehavior') print('sess_drop.label Sessions dropped') print('sess_drop.type DERIVE') print('threads_failed.label Thread creation failed') print('threads_failed.type DERIVE') print('threads_destroyed.label Threads destroyed') print('threads_destroyed.type DERIVE') print('thread_queue_len.label Length of session queue') print('thread_queue_len.type GAUGE') print('sess_pipe_overflow.label Session pipe overflow') print('sess_pipe_overflow.type DERIVE') print('esi_warnings.label ESI parse warnings (unlock)') print('esi_warnings.type DERIVE') print('sess_fail.label Session accept failures') print('sess_fail.type DERIVE') print('backend_busy.label Backend conn. too many') print('backend_busy.type DERIVE') print('esi_errors.label ESI parse errors (unlock)') print('esi_errors.type DERIVE') print('SMF_s0_c_fail.label Allocator failures SMF s0') print('SMF_s0_c_fail.type DERIVE') print('SMA_Transient_c_fail.label Allocator failures SMA Transient') print('SMA_Transient_c_fail.type DERIVE') print('losthdr.label HTTP header overflows') print('losthdr.type DERIVE') print('backend_unhealthy.label Backend conn. not attempted') print('backend_unhealthy.type DERIVE') print('threads_limited.label Threads hit max') print('threads_limited.type DERIVE') print('fetch_failed.label Fetch failed (all causes)') print('fetch_failed.type DERIVE') # expunge print('multigraph varnish_expunge') print('graph_category webserver') print('graph_title Object expunging') print('graph_order n_expired n_lru_nuked') print('n_lru_nuked.label Number of LRU nuked objects') print('n_lru_nuked.min 0') print('n_lru_nuked.type DERIVE') print('n_expired.label Number of expired objects') print('n_expired.min 0') print('n_expired.type DERIVE') # hit_rate print('multigraph varnish_hit_rate') print('graph_category webserver') print('graph_title Hit rates') print('graph_order client_req cache_hit cache_miss cache_hitpass') print('graph_scale no') print('graph_vlabel %') print('graph_args -l 0 -u 100 --rigid') print('client_req.label Good client requests received') print('client_req.graph off') print('client_req.min 0') print('client_req.type DERIVE') print('cache_miss.label Cache misses') print('cache_miss.min 0') print('cache_miss.draw STACK') print('cache_miss.cdef cache_miss,client_req,/,100,*') print('cache_miss.type DERIVE') print('cache_hit.label Cache hits') print('cache_hit.min 0') print('cache_hit.draw AREA') print('cache_hit.cdef cache_hit,client_req,/,100,*') print('cache_hit.type DERIVE') print('cache_hitpass.label Cache hits for pass') print('cache_hitpass.min 0') print('cache_hitpass.draw STACK') print('cache_hitpass.cdef cache_hitpass,client_req,/,100,*') print('cache_hitpass.type DERIVE') # memory_usage print('multigraph varnish_memory_usage') print('graph_category webserver') print('graph_title Memory usage') print('graph_vlabel bytes') print('graph_args --base 1024') print('SMA_Transient_g_bytes.label Bytes outstanding SMA Transient') print('SMA_Transient_g_bytes.type GAUGE') print('SMA_Transient_g_space.label Bytes available SMA Transient') print('SMA_Transient_g_space.type GAUGE') print('SMF_s0_g_bytes.label Bytes outstanding SMF s0') print('SMF_s0_g_bytes.type GAUGE') print('sms_nbytes.label SMS outstanding bytes') print('sms_nbytes.type GAUGE') print('sms_balloc.label SMS bytes allocated') print('sms_balloc.type GAUGE') print('SMF_s0_g_space.label Bytes available SMF s0') print('SMF_s0_g_space.type GAUGE') print('SMA_Transient_c_bytes.label Bytes allocated SMA Transient') print('SMA_Transient_c_bytes.type DERIVE') # objects print('multigraph varnish_objects') print('graph_category webserver') print('graph_title Number of objects') print('graph_order n_object n_objectcore n_vampireobject n_objecthead') print('n_object.label Number of objects') print('n_object.type GAUGE') print('n_vampireobject.label Number of unresurrected objects') print('n_vampireobject.type GAUGE') print('n_objectcore.label Number of object cores') print('n_objectcore.type GAUGE') print('n_objecthead.label Number of object heads') print( 'n_objecthead.info Each object head can have one or more object ' 'attached, typically based on the Vary: header') print('n_objecthead.type GAUGE') # request_rate print('multigraph varnish_request_rate') print('graph_category webserver') print('graph_title Request rates') print( 'graph_order cache_hit cache_hitpass cache_miss backend_conn ' 'backend_unhealthy client_req client_conn') print('client_req.label Good client requests received') print('client_req.min 0') print('client_req.colour 111111') print('client_req.type DERIVE') print('s_pipe.label Total pipe sessions seen') print('s_pipe.min 0') print('s_pipe.colour 1d2bdf') print('s_pipe.type DERIVE') print('sess_conn.label Sessions accepted') print('sess_conn.graph ON') print('sess_conn.min 0') print('sess_conn.colour 444444') print('sess_conn.type DERIVE') print('cache_miss.label Cache misses') print('cache_miss.min 0') print('cache_miss.draw STACK') print('cache_miss.colour FF0000') print('cache_miss.type DERIVE') print('backend_conn.label Backend conn. success') print('backend_conn.min 0') print('backend_conn.colour 995599') print('backend_conn.type DERIVE') print('s_pass.label Total pass-ed requests seen') print('s_pass.min 0') print('s_pass.colour 785d0d') print('s_pass.type DERIVE') print('backend_unhealthy.label Backend conn. not attempted') print('backend_unhealthy.min 0') print('backend_unhealthy.colour FF55FF') print('backend_unhealthy.type DERIVE') print('cache_hitpass.label Cache hits for pass') print('cache_hitpass.min 0') print('cache_hitpass.draw STACK') print('cache_hitpass.colour FFFF00') print( 'cache_hitpass.info Hitpass are cached passes: An entry in the ' 'cache instructing Varnish to pass. Typically achieved after a ' 'pass in vcl_fetch.') print('cache_hitpass.type DERIVE') print('cache_hit.label Cache hits') print('cache_hit.min 0') print('cache_hit.draw AREA') print('cache_hit.colour 00FF00') print('cache_hit.type DERIVE') # threads print('multigraph varnish_threads') print('graph_category webserver') print('graph_title Thread status') print('threads_created.label Threads created') print('threads_created.min 0') print('threads_created.type DERIVE') print('threads_failed.label Thread creation failed') print('threads_failed.min 0') print('threads_failed.warning :1') print('threads_failed.type DERIVE') print('threads_destroyed.label Threads destroyed') print('threads_destroyed.min 0') print('threads_destroyed.warning :1') print('threads_destroyed.type DERIVE') print('threads_limited.label Threads hit max') print('threads_limited.min 0') print('threads_limited.type DERIVE') print('threads.label Total number of threads') print('threads.min 0') print('threads.warning 1:') print('threads.type GAUGE') # transfer_rates print('multigraph varnish_transfer_rates') print('graph_category webserver') print('graph_title Transfer rates') print('graph_order s_resp_bodybytes s_resp_hdrbytes') print('graph_vlabel bit/s') print('graph_args -l 0') print('s_resp_bodybytes.label Body traffic') print('s_resp_bodybytes.min 0') print('s_resp_bodybytes.draw AREA') print('s_resp_bodybytes.cdef s_resp_bodybytes,8,*') print('s_resp_bodybytes.type DERIVE') print('s_resp_hdrbytes.label Header traffic') print('s_resp_hdrbytes.min 0') print('s_resp_hdrbytes.draw STACK') print('s_resp_hdrbytes.cdef s_resp_hdrbytes,8,*') print( 's_resp_hdrbytes.info HTTP Header traffic. TCP/IP overhead is not ' 'included.') print('s_resp_hdrbytes.type DERIVE') # uptime print('multigraph varnish_uptime') print('graph_category webserver') print('graph_title Varnish uptime') print('graph_scale no') print('graph_vlabel days') print('uptime.label Management process uptime') print('uptime.cdef uptime,86400,/') print('uptime.type GAUGE') def fetch(): """Print values.""" # pylint: disable=too-many-statements data = get_values() if not data: return # backend_traffic print('multigraph varnish_backend_traffic') for key in ( 'backend_busy', 'backend_conn', 'backend_retry', 'backend_unhealthy', 'backend_recycle', 'backend_fail', 'backend_toolate', 'backend_reuse', 'backend_req', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) # bad print('multigraph varnish_bad') for key in ( 'sess_drop', 'threads_failed', 'threads_destroyed', 'thread_queue_len', 'sess_pipe_overflow', 'esi_warnings', 'sess_fail', 'backend_busy', 'esi_errors', 'losthdr', 'backend_unhealthy', 'threads_limited', 'fetch_failed', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) print('SMA_Transient_c_fail.value {0}'.format( data['SMA.Transient.c_fail']['value'])) print('SMF_s0_c_fail.value {0}'.format( data['SMF.s0.c_fail']['value'])) # expunge print('multigraph varnish_expunge') for key in ( 'n_lru_nuked', 'n_expired', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) # hit_rate print('multigraph varnish_hit_rate') print('client_req.value {0}'.format( data['MAIN.cache_hit']['value'] + data['MAIN.cache_miss']['value'] + data['MAIN.cache_hitpass']['value'])) for key in ( 'cache_miss', 'cache_hit', 'cache_hitpass', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) # memory_usage print('multigraph varnish_memory_usage') print('SMA_Transient_g_bytes.value {0}'.format( data['SMA.Transient.g_bytes']['value'])) print('SMA_Transient_g_space.value {0}'.format( data['SMA.Transient.g_space']['value'])) print('SMF_s0_g_bytes.value {0}'.format( data['SMF.s0.g_bytes']['value'])) print('sms_nbytes.value {0}'.format( data['MAIN.sms_nbytes']['value'])) print('sms_balloc.value {0}'.format( data['MAIN.sms_balloc']['value'])) print('SMF_s0_g_space.value {0}'.format( data['SMF.s0.g_space']['value'])) print('SMA_Transient_c_bytes.value {0}'.format( data['SMA.Transient.c_bytes']['value'])) # objects print('multigraph varnish_objects') for key in ( 'n_object', 'n_vampireobject', 'n_objectcore', 'n_objecthead', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) # request_rate print('multigraph varnish_request_rate') for key in ( 'client_req', 's_pipe', 'sess_conn', 'cache_miss', 'backend_conn', 's_pass', 'backend_unhealthy', 'cache_hitpass', 'cache_hit', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) # threads print('multigraph varnish_threads') for key in ( 'threads_created', 'threads_failed', 'threads_destroyed', 'threads_limited', 'threads', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) # transfer_rates print('multigraph varnish_transfer_rates') for key in ( 's_resp_bodybytes', 's_resp_hdrbytes', ): print('{0}.value {1}'.format( key, data['MAIN.' + key]['value'])) # uptime print('multigraph varnish_uptime') print('uptime.value {0}'.format(data['MAIN.uptime']['value'])) if __name__ == '__main__': if len(sys.argv) > 1 and sys.argv[1] == 'autoconf': autoconf() elif len(sys.argv) > 1 and sys.argv[1] == 'config': config() else: fetch()