diff --git a/plugins/nginx/nginx_upstream_multi_ b/plugins/nginx/nginx_upstream_multi_ index 1575e507..3493d075 100755 --- a/plugins/nginx/nginx_upstream_multi_ +++ b/plugins/nginx/nginx_upstream_multi_ @@ -1,29 +1,36 @@ #!/usr/bin/env python3 # -# Munin plugin to monitor requests number, cache statuses, http status codes and average request times of -# specified nginx upstreams. +# Munin plugin to monitor requests number, cache statuses, http status codes and average request +# times of specified nginx upstreams. # # Copyright Igor Borodikhin # # License : GPLv3 # # Configuration parameters: -# env.graphs - which graphs to produce (optional, list of graphs separated by spaces, default - cache http time request) +# env.graphs - which graphs to produce (optional, list of graphs separated by spaces, default - +# cache http time request) # env.log - log file path (mandatory, ex.: /var/log/nginx/upstream.log) -# env.upstream - list of upstreams to monitor (mandatory, including port numbers separated by space, ex.: 10.0.0.1:80 10.0.0.2:8080) -# env.statuses - list of http status codes to monitor (optional, default - all statuses, ex.: 200 403 404 410 500 502) -# env.percentiles - which percentiles to draw on time graphs (optional, list of percentiles separated by spaces, default - 80) +# env.upstream - list of upstreams to monitor (mandatory, including port numbers separated by +# space, e.g.: 10.0.0.1:80 10.0.0.2:8080) +# env.statuses - list of http status codes to monitor (optional, default - all statuses, +# e.g.: 200 403 404 410 500 502) +# env.percentiles - which percentiles to draw on time graphs (optional, list of percentiles +# separated by spaces, default - 80) # # ## Installation -# Copy file to directory /usr/share/munin/pligins/ and create symbolic link(s) for each log file you wish to monitor. +# Copy file to directory /usr/share/munin/pligins/ and create symbolic link(s) for each log file +# you wish to monitor. # # Specify log_format at /etc/nginx/conf.d/upstream.conf: -# log_format upstream "ua=[$upstream_addr] ut=[$upstream_response_time] us=[$upstream_status] cs=[$upstream_cache_status]" +# log_format upstream "ua=[$upstream_addr] ut=[$upstream_response_time] us=[$upstream_status] \ +# cs=[$upstream_cache_status]" # # Use it in your site configuration (/etc/nginx/sites-enabled/anything.conf): # access_log /var/log/nginx/upstream.log upstream; # -# Attention! Because munin-node does not have read permission for nginx log files we need to run it as root. +# Attention! Since the default user (nobody) does not have read permission for nginx log files we +# need to run it as root. # # And specify some options in /etc/munin/plugin-conf.d/munin-node: # @@ -35,7 +42,7 @@ # env.statuses 200 403 404 410 500 502 # env.percentiles 50 80 # -#%# family=contrib +# #%# family=contrib import copy import math @@ -66,17 +73,21 @@ else: logPath = "/var/log/nginx/access.log" # Http statuses list -httpStatusString = ("100:Continue;101:Switching protocols;102:Processing;200:OK;201:Created;202:Accepted;" -"203:Non-Authoritative Information;204:No content;205:Reset content;206:Partial content;207:Multi-status;" -"226:IM used;300:Multiple choices;301:Moved permanently;302:Moved temporarily;303:See other;304:Not modified;" -"305:Use proxy;307:Temporary redirect;400:Bad request;401:Unauthorized;402:Payment required;403:Forbidden;" -"404:Not found;405:Method not allowed;406:Not acceptable;407:Proxy Authentication Required;408:Request timeout;" -"409:Conflict;410:Gone;411:Length required;412:Precondition failed;413:Request entity too large;" -"414:Request URI too large;415:Usupported media type;416:Request range not satisfiable;417:Expectation failed;" -"422:Unprocessable entity;423:Locked;424:Failed dependency;425:Unordered collection;426:Upgrade required;" -"449:Retry with;456:Unrecoverable error;500:Internal server error;501:Not implemented;502:Bad gateway;" -"503:Service unavailable;504:Gateway timeout;505:HTTP version not supported;506:Variant also negotiates;" -"507:Insufficient storage;508:Loop detected;509:Bandwidth limit exceeded;510:Not extended") +httpStatusString = ( + "100:Continue;101:Switching protocols;102:Processing;200:OK;201:Created;202:Accepted;" + "203:Non-Authoritative Information;204:No content;205:Reset content;206:Partial content;" + "207:Multi-status;226:IM used;300:Multiple choices;301:Moved permanently;" + "302:Moved temporarily;303:See other;304:Not modified;305:Use proxy;307:Temporary redirect;" + "400:Bad request;401:Unauthorized;402:Payment required;403:Forbidden;404:Not found;" + "405:Method not allowed;406:Not acceptable;407:Proxy Authentication Required;" + "408:Request timeout;409:Conflict;410:Gone;411:Length required;412:Precondition failed;" + "413:Request entity too large;414:Request URI too large;415:Usupported media type;" + "416:Request range not satisfiable;417:Expectation failed;422:Unprocessable entity;" + "423:Locked;424:Failed dependency;425:Unordered collection;426:Upgrade required;" + "449:Retry with;456:Unrecoverable error;500:Internal server error;501:Not implemented;" + "502:Bad gateway;503:Service unavailable;504:Gateway timeout;505:HTTP version not supported;" + "506:Variant also negotiates;507:Insufficient storage;508:Loop detected;" + "509:Bandwidth limit exceeded;510:Not extended") if "statuses" in os.environ: statuses = os.environ["statuses"].split() @@ -88,11 +99,11 @@ for statusString in httpStatusString.split(";"): [code, title] = statusString.split(":") if len(statuses) > 0 and code in statuses or len(statuses) == 0: httpStatusList[code] = { - "title" : title, - "requests" : 0 + "title": title, + "requests": 0 } -cacheStatusList = { "MISS" : 0, "BYPASS" : 0, "EXPIRED" : 0, "UPDATING" : 0, "STALE" : 0, "HIT" : 0 } +cacheStatusList = {"MISS": 0, "BYPASS": 0, "EXPIRED": 0, "UPDATING": 0, "STALE": 0, "HIT": 0} # Parse upstreams upstreams = {} @@ -101,11 +112,11 @@ if "upstream" in os.environ: upstreamList = upstreamString.split() for upstream in upstreamList: upstreams[upstream] = { - "requests" : 0, - "time" : 0, - "times" : [], - "cache" : copy.deepcopy(cacheStatusList), - "http" : copy.deepcopy(httpStatusList) + "requests": 0, + "time": 0, + "times": [], + "cache": copy.deepcopy(cacheStatusList), + "http": copy.deepcopy(httpStatusList) } else: raise Exception("No upstreams specified") @@ -132,6 +143,7 @@ except OSError: def sanitize(string): return string.replace(".", "_").replace(":", "_").replace("/", "_").replace("-", "_") + if len(sys.argv) == 2 and sys.argv[1] == "config": # Parent graph declaration print("multigraph nginx_upstream_multi_%s" % siteName.replace(".", "_")) @@ -145,7 +157,8 @@ if len(sys.argv) == 2 and sys.argv[1] == "config": if "request" in graphs_enabled: for upstream in upstreams.keys(): print() - print("multigraph nginx_upstream_multi_%s.%s_requests" % (sanitize(siteName), sanitize(upstream))) + print("multigraph nginx_upstream_multi_%s.%s_requests" + % (sanitize(siteName), sanitize(upstream))) print("graph_title Requests number - %s" % upstream) print("graph_vlabel rps") print("graph_category webserver") @@ -156,32 +169,37 @@ if len(sys.argv) == 2 and sys.argv[1] == "config": if "time" in graphs_enabled: for upstream in upstreams.keys(): print() - print("multigraph nginx_upstream_multi_%s.%s_times" % (sanitize(siteName), sanitize(upstream))) + print("multigraph nginx_upstream_multi_%s.%s_times" + % (sanitize(siteName), sanitize(upstream))) print("graph_title Request time - %s" % upstream) print("graph_vlabel sec.") print("graph_category webserver") print("us%s_times.label average" % (sanitize(upstream))) for percentile in percentiles: - print("us%s_times_percentile_%s.label %s-percentile" % (sanitize(upstream), percentile, percentile)) + print("us%s_times_percentile_%s.label %s-percentile" + % (sanitize(upstream), percentile, percentile)) print() # HTTP Status codes graph declaration if "http" in graphs_enabled: for upstream in upstreams.keys(): print() - print("multigraph nginx_upstream_multi_%s.%s_statuses" % (sanitize(siteName), sanitize(upstream))) + print("multigraph nginx_upstream_multi_%s.%s_statuses" + % (sanitize(siteName), sanitize(upstream))) print("graph_title HTTP - %s" % upstream) print("graph_vlabel rps") print("graph_category webserver") for status in sorted(httpStatusList.keys()): - print("http%s_%s_status.label %s - %s" % (status, sanitize(upstream), status, httpStatusList[status]["title"])) + print("http%s_%s_status.label %s - %s" + % (status, sanitize(upstream), status, httpStatusList[status]["title"])) print() # Cache status graph declaration if "cache" in graphs_enabled: for upstream in upstreams.keys(): print() - print("multigraph nginx_upstream_multi_%s.%s_cache" % (sanitize(siteName), sanitize(upstream))) + print("multigraph nginx_upstream_multi_%s.%s_cache" + % (sanitize(siteName), sanitize(upstream))) print("graph_title Cache - %s" % upstream) print("graph_vlabel rps") print("graph_category webserver") @@ -199,7 +217,7 @@ else: except Exception: lastByte = 0 - if lastByteHandle != None: + if lastByteHandle is not None: lastByteHandle.close() try: @@ -224,59 +242,59 @@ else: if (match): # Extract data address = match.group(1) - time = match.group(2) - status = match.group(3) - cache = match.group(4) + time = match.group(2) + status = match.group(3) + cache = match.group(4) # Replace separators by space address = address.replace(",", " ") address = address.replace(" : ", " ") - address = re.sub("\s+", " ", address) + address = re.sub(r"\s+", " ", address) - time = time.replace(",", " ") - time = time.replace(" : ", " ") - time = re.sub("\s+", " ", time) + time = time.replace(",", " ") + time = time.replace(" : ", " ") + time = re.sub(r"\s+", " ", time) - status = status.replace(",", " ") - status = status.replace(" : ", " ") - status = re.sub("\s+", " ", status) + status = status.replace(",", " ") + status = status.replace(" : ", " ") + status = re.sub(r"\s+", " ", status) - cache = cache.replace(",", " ") - cache = cache.replace(" : ", " ") - cache = re.sub("\s+", " ", cache) + cache = cache.replace(",", " ") + cache = cache.replace(" : ", " ") + cache = re.sub(r"\s+", " ", cache) addresses = address.split() - times = time.split() - statuses = status.split() - caches = cache.split() + times = time.split() + statuses = status.split() + caches = cache.split() index = 0 for uAddress in addresses: if uAddress in upstreams.keys(): try: - uTime = float(times[index]) + uTime = float(times[index]) except ValueError: - uTime = 0 + uTime = 0 if index < len(statuses): - uStatus = statuses[index] + uStatus = statuses[index] else: uStatus = "-" if index < len(caches): - uCache = caches[index] + uCache = caches[index] else: uCache = "-" if uAddress != "-": - upstreams[uAddress]["requests"] += 1 + upstreams[uAddress]["requests"] += 1 if uTime != "-": - upstreams[uAddress]["time"] += uTime + upstreams[uAddress]["time"] += uTime upstreams[uAddress]["times"].append(uTime) if uStatus != "-" and uStatus in upstreams[uAddress]["http"].keys(): upstreams[uAddress]["http"][uStatus]["requests"] += 1 if uCache != "-": - upstreams[uAddress]["cache"][uCache] += 1 + upstreams[uAddress]["cache"][uCache] += 1 index += 1 try: @@ -301,12 +319,11 @@ else: if "request" in graphs_enabled: for upstream in upstreams.keys(): print() - print("multigraph nginx_upstream_multi_%s.%s_requests" % (sanitize(siteName), sanitize(upstream))) - + print("multigraph nginx_upstream_multi_%s.%s_requests" + % (sanitize(siteName), sanitize(upstream))) value = 0 if timeElapsed > 0: value = upstreams[upstream]["requests"] / timeElapsed - print("us%s_requests.value %s" % (sanitize(upstream), value)) print() @@ -318,27 +335,31 @@ else: uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"] upstreams[upstream]["times"].sort() print() - print("multigraph nginx_upstream_multi_%s.%s_times" % (sanitize(siteName), sanitize(upstream))) + print("multigraph nginx_upstream_multi_%s.%s_times" + % (sanitize(siteName), sanitize(upstream))) print("us%s_times.value %s" % (sanitize(upstream), uTime)) for percentile in percentiles: percentileValue = 0 if upstreams[upstream]["requests"] > 0: uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"] percentileKey = int(percentile) * len(upstreams[upstream]["times"]) / 100 - if len(upstreams[upstream]["times"])%2 > 0: + if len(upstreams[upstream]["times"]) % 2 > 0: low = int(math.floor(percentileKey)) high = int(math.ceil(percentileKey)) - percentileValue = (upstreams[upstream]["times"][low] + upstreams[upstream]["times"][high]) / 2 + percentileValue = (upstreams[upstream]["times"][low] + + upstreams[upstream]["times"][high]) / 2 else: percentileValue = upstreams[upstream]["times"][int(percentileKey)] - print("us%s_times_percentile_%s.value %s" % (sanitize(upstream), percentile, percentileValue)) + print("us%s_times_percentile_%s.value %s" + % (sanitize(upstream), percentile, percentileValue)) print() # HTTP Status codes graph data if "http" in graphs_enabled: for upstream in upstreams.keys(): print() - print("multigraph nginx_upstream_multi_%s.%s_statuses" % (sanitize(siteName), sanitize(upstream))) + print("multigraph nginx_upstream_multi_%s.%s_statuses" + % (sanitize(siteName), sanitize(upstream))) for status in sorted(httpStatusList.keys()): value = 0 if timeElapsed > 0: @@ -351,7 +372,8 @@ else: if "cache" in graphs_enabled: for upstream in upstreams.keys(): print() - print("multigraph nginx_upstream_multi_%s.%s_cache" % (sanitize(siteName), sanitize(upstream))) + print("multigraph nginx_upstream_multi_%s.%s_cache" + % (sanitize(siteName), sanitize(upstream))) for status in cacheStatusList: value = 0 if timeElapsed > 0: