diff --git a/plugins/varnish/varnish4_multigraph b/plugins/varnish/varnish4_multigraph new file mode 100755 index 00000000..0d268358 --- /dev/null +++ b/plugins/varnish/varnish4_multigraph @@ -0,0 +1,453 @@ +#!/usr/bin/python3 -tt +# -*- coding: utf-8 -*- + +""" Munin plugin to monitor Varnish 4 status. + +Copyright 2016, Kim B. Heino, b@bbbs.net, Foobar Oy +License GPLv2+ + +This is based heavily on varnish4_ plugin from Munin contrib +by Kristian Lyngstol / Redpill Linpro AS. + +#%# capabilities=autoconf +#%# family=auto +""" +# pylint: disable=invalid-name +# pylint: enable=invalid-name + +import json +import subprocess +import sys + + +def run_binary(arg): + """Run binary and return output.""" + try: + cmd = subprocess.Popen( + arg, shell=False, close_fds=True, bufsize=-1, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + outdata, dummy_errdata = cmd.communicate() + except OSError: + return '' + return outdata.decode('utf-8', 'ignore') + + +def get_values(): + """Run varnishstat and parse it's output.""" + output = run_binary(['/usr/bin/varnishstat', '-j']) + try: + return json.loads(output) + except (TypeError, ValueError): + return {} + + +def autoconf(): + """Print "yes" or "no".""" + status = 'yes' if get_values() else 'no' + print(status) + + +def config(): + """Print plugin config.""" + # pylint: disable=too-many-statements + + # backend_traffic + print('multigraph varnish_backend_traffic') + print('graph_category webserver') + print('graph_title Backend traffic') + print('backend_busy.label Backend conn. too many') + print('backend_busy.min 0') + print('backend_busy.type DERIVE') + print('backend_conn.label Backend conn. success') + print('backend_conn.min 0') + print('backend_conn.type DERIVE') + print('backend_retry.label Backend conn. retry') + print('backend_retry.min 0') + print('backend_retry.type DERIVE') + print('backend_unhealthy.label Backend conn. not attempted') + print('backend_unhealthy.min 0') + print('backend_unhealthy.warning :1') + print('backend_unhealthy.type DERIVE') + print('backend_recycle.label Backend conn. recycles') + print('backend_recycle.min 0') + print('backend_recycle.type DERIVE') + print('backend_fail.label Backend conn. failures') + print('backend_fail.min 0') + print('backend_fail.type DERIVE') + print('backend_toolate.label Backend conn. was closed') + print('backend_toolate.min 0') + print('backend_toolate.type DERIVE') + print('backend_reuse.label Backend conn. reuses') + print('backend_reuse.min 0') + print('backend_reuse.type DERIVE') + print('backend_req.label Backend requests made') + print('backend_req.min 0') + print('backend_req.type DERIVE') + + # bad + print('multigraph varnish_bad') + print('graph_category webserver') + print('graph_title Misbehavior') + print('sess_drop.label Sessions dropped') + print('sess_drop.type DERIVE') + print('threads_failed.label Thread creation failed') + print('threads_failed.type DERIVE') + print('threads_destroyed.label Threads destroyed') + print('threads_destroyed.type DERIVE') + print('thread_queue_len.label Length of session queue') + print('thread_queue_len.type GAUGE') + print('sess_pipe_overflow.label Session pipe overflow') + print('sess_pipe_overflow.type DERIVE') + print('esi_warnings.label ESI parse warnings (unlock)') + print('esi_warnings.type DERIVE') + print('sess_fail.label Session accept failures') + print('sess_fail.type DERIVE') + print('backend_busy.label Backend conn. too many') + print('backend_busy.type DERIVE') + print('esi_errors.label ESI parse errors (unlock)') + print('esi_errors.type DERIVE') + print('SMF_s0_c_fail.label Allocator failures SMF s0') + print('SMF_s0_c_fail.type DERIVE') + print('SMA_Transient_c_fail.label Allocator failures SMA Transient') + print('SMA_Transient_c_fail.type DERIVE') + print('losthdr.label HTTP header overflows') + print('losthdr.type DERIVE') + print('backend_unhealthy.label Backend conn. not attempted') + print('backend_unhealthy.type DERIVE') + print('threads_limited.label Threads hit max') + print('threads_limited.type DERIVE') + print('fetch_failed.label Fetch failed (all causes)') + print('fetch_failed.type DERIVE') + + # expunge + print('multigraph varnish_expunge') + print('graph_category webserver') + print('graph_title Object expunging') + print('graph_order n_expired n_lru_nuked') + print('n_lru_nuked.label Number of LRU nuked objects') + print('n_lru_nuked.min 0') + print('n_lru_nuked.type DERIVE') + print('n_expired.label Number of expired objects') + print('n_expired.min 0') + print('n_expired.type DERIVE') + + # hit_rate + print('multigraph varnish_hit_rate') + print('graph_category webserver') + print('graph_title Hit rates') + print('graph_order client_req cache_hit cache_miss cache_hitpass') + print('graph_scale no') + print('graph_vlabel %') + print('graph_args -l 0 -u 100 --rigid') + print('client_req.label Good client requests received') + print('client_req.graph off') + print('client_req.min 0') + print('client_req.type DERIVE') + print('cache_miss.label Cache misses') + print('cache_miss.min 0') + print('cache_miss.draw STACK') + print('cache_miss.cdef cache_miss,client_req,/,100,*') + print('cache_miss.type DERIVE') + print('cache_hit.label Cache hits') + print('cache_hit.min 0') + print('cache_hit.draw AREA') + print('cache_hit.cdef cache_hit,client_req,/,100,*') + print('cache_hit.type DERIVE') + print('cache_hitpass.label Cache hits for pass') + print('cache_hitpass.min 0') + print('cache_hitpass.draw STACK') + print('cache_hitpass.cdef cache_hitpass,client_req,/,100,*') + print('cache_hitpass.type DERIVE') + + # memory_usage + print('multigraph varnish_memory_usage') + print('graph_category webserver') + print('graph_title Memory usage') + print('graph_vlabel bytes') + print('graph_args --base 1024') + print('SMA_Transient_g_bytes.label Bytes outstanding SMA Transient') + print('SMA_Transient_g_bytes.type GAUGE') + print('SMA_Transient_g_space.label Bytes available SMA Transient') + print('SMA_Transient_g_space.type GAUGE') + print('SMF_s0_g_bytes.label Bytes outstanding SMF s0') + print('SMF_s0_g_bytes.type GAUGE') + print('sms_nbytes.label SMS outstanding bytes') + print('sms_nbytes.type GAUGE') + print('sms_balloc.label SMS bytes allocated') + print('sms_balloc.type GAUGE') + print('SMF_s0_g_space.label Bytes available SMF s0') + print('SMF_s0_g_space.type GAUGE') + print('SMA_Transient_c_bytes.label Bytes allocated SMA Transient') + print('SMA_Transient_c_bytes.type DERIVE') + + # objects + print('multigraph varnish_objects') + print('graph_category webserver') + print('graph_title Number of objects') + print('graph_order n_object n_objectcore n_vampireobject n_objecthead') + print('n_object.label Number of objects') + print('n_object.type GAUGE') + print('n_vampireobject.label Number of unresurrected objects') + print('n_vampireobject.type GAUGE') + print('n_objectcore.label Number of object cores') + print('n_objectcore.type GAUGE') + print('n_objecthead.label Number of object heads') + print( + 'n_objecthead.info Each object head can have one or more object ' + 'attached, typically based on the Vary: header') + print('n_objecthead.type GAUGE') + + # request_rate + print('multigraph varnish_request_rate') + print('graph_category webserver') + print('graph_title Request rates') + print( + 'graph_order cache_hit cache_hitpass cache_miss backend_conn ' + 'backend_unhealthy client_req client_conn') + print('client_req.label Good client requests received') + print('client_req.min 0') + print('client_req.colour 111111') + print('client_req.type DERIVE') + print('s_pipe.label Total pipe sessions seen') + print('s_pipe.min 0') + print('s_pipe.colour 1d2bdf') + print('s_pipe.type DERIVE') + print('sess_conn.label Sessions accepted') + print('sess_conn.graph ON') + print('sess_conn.min 0') + print('sess_conn.colour 444444') + print('sess_conn.type DERIVE') + print('cache_miss.label Cache misses') + print('cache_miss.min 0') + print('cache_miss.draw STACK') + print('cache_miss.colour FF0000') + print('cache_miss.type DERIVE') + print('backend_conn.label Backend conn. success') + print('backend_conn.min 0') + print('backend_conn.colour 995599') + print('backend_conn.type DERIVE') + print('s_pass.label Total pass-ed requests seen') + print('s_pass.min 0') + print('s_pass.colour 785d0d') + print('s_pass.type DERIVE') + print('backend_unhealthy.label Backend conn. not attempted') + print('backend_unhealthy.min 0') + print('backend_unhealthy.colour FF55FF') + print('backend_unhealthy.type DERIVE') + print('cache_hitpass.label Cache hits for pass') + print('cache_hitpass.min 0') + print('cache_hitpass.draw STACK') + print('cache_hitpass.colour FFFF00') + print( + 'cache_hitpass.info Hitpass are cached passes: An entry in the ' + 'cache instructing Varnish to pass. Typically achieved after a ' + 'pass in vcl_fetch.') + print('cache_hitpass.type DERIVE') + print('cache_hit.label Cache hits') + print('cache_hit.min 0') + print('cache_hit.draw AREA') + print('cache_hit.colour 00FF00') + print('cache_hit.type DERIVE') + + # threads + print('multigraph varnish_threads') + print('graph_category webserver') + print('graph_title Thread status') + print('threads_created.label Threads created') + print('threads_created.min 0') + print('threads_created.type DERIVE') + print('threads_failed.label Thread creation failed') + print('threads_failed.min 0') + print('threads_failed.warning :1') + print('threads_failed.type DERIVE') + print('threads_destroyed.label Threads destroyed') + print('threads_destroyed.min 0') + print('threads_destroyed.warning :1') + print('threads_destroyed.type DERIVE') + print('threads_limited.label Threads hit max') + print('threads_limited.min 0') + print('threads_limited.type DERIVE') + print('threads.label Total number of threads') + print('threads.min 0') + print('threads.warning 1:') + print('threads.type GAUGE') + + # transfer_rates + print('multigraph varnish_transfer_rates') + print('graph_category webserver') + print('graph_title Transfer rates') + print('graph_order s_resp_bodybytes s_resp_hdrbytes') + print('graph_vlabel bit/s') + print('graph_args -l 0') + print('s_resp_bodybytes.label Body traffic') + print('s_resp_bodybytes.min 0') + print('s_resp_bodybytes.draw AREA') + print('s_resp_bodybytes.cdef s_resp_bodybytes,8,*') + print('s_resp_bodybytes.type DERIVE') + print('s_resp_hdrbytes.label Header traffic') + print('s_resp_hdrbytes.min 0') + print('s_resp_hdrbytes.draw STACK') + print('s_resp_hdrbytes.cdef s_resp_hdrbytes,8,*') + print( + 's_resp_hdrbytes.info HTTP Header traffic. TCP/IP overhead is not ' + 'included.') + print('s_resp_hdrbytes.type DERIVE') + + # uptime + print('multigraph varnish_uptime') + print('graph_category webserver') + print('graph_title Varnish uptime') + print('graph_scale no') + print('graph_vlabel days') + print('uptime.label Management process uptime') + print('uptime.cdef uptime,86400,/') + print('uptime.type GAUGE') + + +def fetch(): + """Print values.""" + # pylint: disable=too-many-statements + data = get_values() + if not data: + return + + # backend_traffic + print('multigraph varnish_backend_traffic') + for key in ( + 'backend_busy', + 'backend_conn', + 'backend_retry', + 'backend_unhealthy', + 'backend_recycle', + 'backend_fail', + 'backend_toolate', + 'backend_reuse', + 'backend_req', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + + # bad + print('multigraph varnish_bad') + for key in ( + 'sess_drop', + 'threads_failed', + 'threads_destroyed', + 'thread_queue_len', + 'sess_pipe_overflow', + 'esi_warnings', + 'sess_fail', + 'backend_busy', + 'esi_errors', + 'losthdr', + 'backend_unhealthy', + 'threads_limited', + 'fetch_failed', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + print('SMA_Transient_c_fail.value {0}'.format( + data['SMA.Transient.c_fail']['value'])) + print('SMF_s0_c_fail.value {0}'.format( + data['SMF.s0.c_fail']['value'])) + + # expunge + print('multigraph varnish_expunge') + for key in ( + 'n_lru_nuked', + 'n_expired', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + + # hit_rate + print('multigraph varnish_hit_rate') + print('client_req.value {0}'.format( + data['MAIN.cache_hit']['value'] + + data['MAIN.cache_miss']['value'] + + data['MAIN.cache_hitpass']['value'])) + for key in ( + 'cache_miss', + 'cache_hit', + 'cache_hitpass', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + + # memory_usage + print('multigraph varnish_memory_usage') + print('SMA_Transient_g_bytes.value {0}'.format( + data['SMA.Transient.g_bytes']['value'])) + print('SMA_Transient_g_space.value {0}'.format( + data['SMA.Transient.g_space']['value'])) + print('SMF_s0_g_bytes.value {0}'.format( + data['SMF.s0.g_bytes']['value'])) + print('sms_nbytes.value {0}'.format( + data['MAIN.sms_nbytes']['value'])) + print('sms_balloc.value {0}'.format( + data['MAIN.sms_balloc']['value'])) + print('SMF_s0_g_space.value {0}'.format( + data['SMF.s0.g_space']['value'])) + print('SMA_Transient_c_bytes.value {0}'.format( + data['SMA.Transient.c_bytes']['value'])) + + # objects + print('multigraph varnish_objects') + for key in ( + 'n_object', + 'n_vampireobject', + 'n_objectcore', + 'n_objecthead', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + + # request_rate + print('multigraph varnish_request_rate') + for key in ( + 'client_req', + 's_pipe', + 'sess_conn', + 'cache_miss', + 'backend_conn', + 's_pass', + 'backend_unhealthy', + 'cache_hitpass', + 'cache_hit', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + + # threads + print('multigraph varnish_threads') + for key in ( + 'threads_created', + 'threads_failed', + 'threads_destroyed', + 'threads_limited', + 'threads', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + + # transfer_rates + print('multigraph varnish_transfer_rates') + for key in ( + 's_resp_bodybytes', + 's_resp_hdrbytes', + ): + print('{0}.value {1}'.format( + key, data['MAIN.' + key]['value'])) + + # uptime + print('multigraph varnish_uptime') + print('uptime.value {0}'.format(data['MAIN.uptime']['value'])) + + +if __name__ == '__main__': + if len(sys.argv) > 1 and sys.argv[1] == 'autoconf': + autoconf() + elif len(sys.argv) > 1 and sys.argv[1] == 'config': + config() + else: + fetch()