From 13d5b23442f00ba7f83d24c0737a945a9e4cd420 Mon Sep 17 00:00:00 2001 From: Olivier Mehani Date: Wed, 26 Aug 2020 00:17:18 +1000 Subject: [PATCH] [plugin/docker_] Improvements to docker_ plugin (#1094) * Add support for docker_network * Actually show non running containers counts * Show intermediate and dangling images counts * Add warnings on dangling images and dead containers * Fully support autoconf and suggest * Add graph_total to the status (thereby making the containers graph redundant), CPU and Memory graphs * Show most graphs as AREASTACK, this allows to get a clearer view of both individual and total use * Use LINESTACK1 to work around munin-contrib/munin#1343 * Include all containers in CPU/Memory/Network config, so data from non-running containers is still displayed * Sprinkle some info and extinfo * Add ClientWrapper around docker module, providing caching and sorting * Reverse-engineer author list from git log --- plugins/docker/docker_ | 323 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 284 insertions(+), 39 deletions(-) diff --git a/plugins/docker/docker_ b/plugins/docker/docker_ index c337da79..f2d951b5 100755 --- a/plugins/docker/docker_ +++ b/plugins/docker/docker_ @@ -5,7 +5,7 @@ docker_ - Docker wildcard-plugin to monitor a L host. This wildcard plugin provides at the moment only the suffixes C, C, C, -C, C and C. +C, C, C and C. =head1 INSTALLATION @@ -18,6 +18,7 @@ C, C and C. ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes @@ -56,45 +57,181 @@ Would exclude all containers with the word "runner" in the name. env.EXCLUDE_CONTAINER_NAME regexp =back + +=head1 AUTHORS + +This section has been reverse-engineered from git logs + +* Codimp : original rewrite +* Rowan Wookey : performance improvement +* Olivier Mehani : Network support, ClientWrapper, gerenal + cleanup + +=head1 MAGIC MARKERS + +#%# family=auto +#%# capabilities=autoconf suggest + """ import os import sys -import docker import re +from functools import cached_property from multiprocessing import Process, Queue +def sorted_by_creation_date(func): + def sorted_func(*args, **kwargs): + return sorted( + func(*args, **kwargs), + key=( + lambda x: x.attrs['CreatedAt'] + if 'CreatedAt' in x.attrs + else x.attrs['Created'] + ) + ) + return sorted_func + + +class ClientWrapper: + """ + A small wrapper for the docker client, to centralise some parsing logic, + and support caching. + + In addition, when the exclude_re parameter is not None, + any container which name is matched by the RE will not be excluded from reports. + """ + client = None + exclude = None + + def __init__(self, client, exclude_re=None): + self.client = client + if exclude_re: + self.exclude = re.compile(exclude_re) + + @cached_property + @sorted_by_creation_date + def containers(self): + return self.client.containers.list() + + @cached_property + @sorted_by_creation_date + def all_containers(self): + return [c for c in self.client.containers.list(all=True) + if not self.exclude + or not self.exclude.search(c.name)] + + @cached_property + @sorted_by_creation_date + def intermediate_images(self): + return list( + set(self.all_images) + .difference( + set(self.images) + .difference( + set(self.dangling_images) + ) + ) + ) + + @cached_property + @sorted_by_creation_date + def all_images(self): + return self.client.images.list(all=True) + + @cached_property + @sorted_by_creation_date + def images(self): + images = self.client.images.list() + return list( + set(images) + .difference( + set(self.dangling_images)) + ) + + @cached_property + @sorted_by_creation_date + def dangling_images(self): + return self.client.images.list(filters={'dangling': True}) + + @cached_property + @sorted_by_creation_date + def volumes(self): + return self.client.volumes.list() + + +def container_summary(container): + summary = container.name + attributes = container_attributes(container) + if attributes: + summary += f' ({attributes})' + return summary + + +def container_attributes(container): + attributes = container.image.tags + attributes.append(container.attrs['Created']) + return ', '.join(attributes) + + def print_containers_status(client): - running = 0 - paused = 0 - created = 0 - restarting = 0 - removing = 0 - exited = 0 - dead = 0 - for container in client.containers.list(): + running = [] + paused = [] + created = [] + restarting = [] + removing = [] + exited = [] + dead = [] + for container in client.all_containers: if container.status == 'running': - running += 1 + running.append(container) elif container.status == 'paused': - paused += 1 + paused.append(container) elif container.status == 'created': - created += 1 + created.append(container) elif container.status == 'restarting': - restarting += 1 + restarting.append(container) elif container.status == 'removing': - removing += 1 + removing.append(container) elif container.status == 'exited': - exited += 1 + exited.append(container) elif container.status == 'dead': - dead += 1 - print('running.value', running) - print('paused.value', paused) - print('created.value', created) - print('restarting.value', restarting) - print('removing.value', removing) - print('exited.value', exited) - print('dead.value', dead) + dead.append(container) + print('running.value', len(running)) + print('running.extinfo', ', '.join(container_summary(c) for c in running)) + print('paused.value', len(paused)) + print('paused.extinfo', ', '.join(container_summary(c) for c in paused)) + print('created.value', len(created)) + print('created.extinfo', ', '.join(container_summary(c) for c in created)) + print('restarting.value', len(restarting)) + print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting)) + print('removing.value', len(removing)) + print('removing.extinfo', ', '.join(container_summary(c) for c in removing)) + print('exited.value', len(exited)) + print('exited.extinfo', ', '.join(container_summary(c) for c in exited)) + print('dead.value', len(dead)) + print('dead.extinfo', ', '.join(container_summary(c) for c in dead)) + + +def image_summary(image): + attributes = image.tags + attributes.append(image.attrs['Created']) + attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB") + return f"{image.short_id} ({', '.join(attributes)})" + + +def print_images_count(client): + images = client.images + intermediate = client.intermediate_images + dangling = client.dangling_images + + print('intermediate_quantity.value', len(intermediate)) + print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate)) + print('images_quantity.value', len(images)) + print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images)) + print('dangling_quantity.value', len(dangling)) + print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling)) def get_container_stats(container, q): @@ -104,10 +241,7 @@ def get_container_stats(container, q): def parallel_container_stats(client): proc_list = [] stats = {} - exclude = os.getenv('EXCLUDE_CONTAINER_NAME') - for container in client.containers.list(): - if exclude and re.search(exclude, container.name): - break + for container in client.containers: q = Queue() p = Process(target=get_container_stats, args=(container, q)) proc_list.append({'proc': p, 'queue': q, 'container': container}) @@ -122,18 +256,39 @@ def print_containers_cpu(client): for container, stats in parallel_container_stats(client): cpu_count = len(stats["cpu_stats"]["cpu_usage"]["percpu_usage"]) cpu_percent = 0.0 - cpu_delta = float(stats["cpu_stats"]["cpu_usage"]["total_usage"]) \ - - float(stats["precpu_stats"]["cpu_usage"]["total_usage"]) - system_delta = float(stats["cpu_stats"]["system_cpu_usage"]) \ - - float(stats["precpu_stats"]["system_cpu_usage"]) + cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"]) + - float(stats["precpu_stats"]["cpu_usage"]["total_usage"])) + system_delta = (float(stats["cpu_stats"]["system_cpu_usage"]) + - float(stats["precpu_stats"]["system_cpu_usage"])) if system_delta > 0.0: cpu_percent = cpu_delta / system_delta * 100.0 * cpu_count print(container.name + '.value', cpu_percent) + print(container.name + '.extinfo', container_attributes(container)) def print_containers_memory(client): for container, stats in parallel_container_stats(client): print(container.name + '.value', stats['memory_stats']['stats']['total_rss']) + print(container.name + '.extinfo', container_attributes(container)) + + +def print_containers_network(client): + for container, stats in parallel_container_stats(client): + tx_bytes = 0 + rx_bytes = 0 + for data in stats['networks'].values(): + tx_bytes += data['tx_bytes'] + rx_bytes += data['rx_bytes'] + print(container.name + '_up.value', tx_bytes) + print(container.name + '_down.value', rx_bytes) + print(container.name + '.extinfo', container_attributes(container)) + + +def volume_summary(volume): + summary = f"{volume.short_id}" + if volume.attrs['Labels']: + summary += " ({', '.join(volume.attrs['Labels'])})" + return summary def main(): @@ -143,28 +298,70 @@ def main(): mode = "" wildcard = sys.argv[0].split("docker_")[1].split("_")[0] + try: + import docker + client = docker.from_env() + if mode == "autoconf": + client.ping() + print('yes') + sys.exit(0) + except Exception as e: + print(f'no ({e})') + if mode == "autoconf": + sys.exit(0) + sys.exit(1) + if mode == "suggest": - print("containers") print("cpu") print("images") print("memory") + print("network") print("status") print("volumes") + sys.exit(0) - client = docker.from_env() + client = ClientWrapper(client, + exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME')) if wildcard == "status": if mode == "config": print("graph_title Docker status") print("graph_vlabel containers") print("graph_category virtualization") + print("graph_total All containers") print("running.label RUNNING") + print("running.draw AREASTACK") + print("running.info Running containers can be manipulated with " + "`docker container [attach|kill|logs|pause|restart|stop] ` or " + "commands run in them with `docker container exec " + "[--detach|--interactive,--privileged,--tty] `" + ) print("paused.label PAUSED") + print("paused.draw AREASTACK") + print("paused.info Paused containers can be resumed with " + "`docker container unpause `") print("created.label CREATED") + print("created.draw AREASTACK") + print("created.info New containers can be created with " + "`docker container create --name ` or " + "`docker container run --name `") print("restarting.label RESTARTING") + print("restarting.draw AREASTACK") + print("restarting.info Containers can be restarted with " + "`docker container restart `") print("removing.label REMOVING") + print("removing.draw AREASTACK") + print("removing.info Containers can be removed with " + "`docker container rm `") print("exited.label EXITED") + print("exited.draw AREASTACK") + print("exited.info Exited containers can be started with " + "`docker container start [--attach] `") print("dead.label DEAD") + print("dead.draw AREASTACK") + print("dead.warning 1") + print("dead.info Dead containers can be started with " + "`docker container start `") else: print_containers_status(client) elif wildcard == "containers": @@ -174,23 +371,42 @@ def main(): print("graph_category virtualization") print("containers_quantity.label Containers") else: - print('containers_quantity.value', len(client.containers.list())) + print('containers_quantity.value', len(client.containers)) elif wildcard == "images": if mode == "config": print("graph_title Docker images") print("graph_vlabel images") print("graph_category virtualization") + print("graph_total All images") + print("intermediate_quantity.label Intermediate images") + print("intermediate_quantity.draw AREASTACK") + print("intermediate_quantity.info All unused images can be deleted with " + "`docker image prune --all`") print("images_quantity.label Images") + print("images_quantity.draw AREASTACK") + print("images_quantity.info Images can be used in containers with " + "`docker container create --name ` or " + "`docker container run --name `") + print("dangling_quantity.label Dangling images") + print("dangling_quantity.draw AREASTACK") + print("dangling_quantity.info Dangling images can be deleted with " + "`docker image prune`" + "or tagged with `docker image tag `") + print("dangling_quantity.warning 10") else: - print('images_quantity.value', len(client.images.list())) + print_images_count(client) elif wildcard == "volumes": if mode == "config": print("graph_title Docker volumes") print("graph_vlabel volumes") print("graph_category virtualization") print("volumes_quantity.label Volumes") + print("volumes_quantity.draw AREASTACK") + print("volumes_quantity.info Unused volumes can be deleted with " + "`docker volume prune`") else: - print('volumes_quantity.value', len(client.volumes.list())) + print('volumes_quantity.value', len(client.volumes)) + print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes)) elif wildcard == "cpu": if mode == "config": graphlimit = str(os.cpu_count() * 100) @@ -201,8 +417,11 @@ def main(): print("graph_vlabel CPU usage (%)") print("graph_category virtualization") print("graph_info This graph shows docker container CPU usage.") - for container in client.containers.list(): + print("graph_total Total CPU usage") + for container in client.all_containers: print("{}.label {}".format(container.name, container.name)) + print("{}.draw AREASTACK".format(container.name)) + print("{}.info {}".format(container.name, container_attributes(container))) else: print_containers_cpu(client) elif wildcard == "memory": @@ -212,10 +431,36 @@ def main(): print("graph_vlabel Bytes") print("graph_category virtualization") print("graph_info This graph shows docker container memory usage.") - for container in client.containers.list(): + print("graph_total Total memory usage") + for container in client.all_containers: print("{}.label {}".format(container.name, container.name)) + print("{}.draw AREASTACK".format(container.name)) + print("{}.info {}".format(container.name, container_attributes(container))) else: print_containers_memory(client) + elif wildcard == "network": + if mode == "config": + print("graph_title Docker containers network usage") + print("graph_args --base 1024 -l 0") + print("graph_vlabel bits in (-) / out (+) per ${graph_period}") + print("graph_category virtualization") + print("graph_info This graph shows docker container network usage.") + print("graph_total Total network usage") + for container in client.all_containers: + print("{}_down.label {}_received".format(container.name, container.name)) + print("{}_down.type DERIVE".format(container.name)) + print("{}_down.min 0".format(container.name)) + print("{}_down.graph no".format(container.name)) + print("{}_down.cdef {}_down,8,*".format(container.name, container.name)) + print("{}_up.label {}".format(container.name, container.name)) + print("{}_up.draw LINESTACK1".format(container.name)) + print("{}_up.type DERIVE".format(container.name)) + print("{}_up.min 0".format(container.name)) + print("{}_up.negative {}_down".format(container.name, container.name)) + print("{}_up.cdef {}_up,8,*".format(container.name, container.name)) + print("{}_up.info {}".format(container.name, container_attributes(container))) + else: + print_containers_network(client) if __name__ == '__main__':