mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
318 lines
9.7 KiB
Python
Executable file
318 lines
9.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
""" Munin plugin to monitor the size and file numbers of a bucket in a S3 compatible storage
|
|
|
|
=head1 Name
|
|
|
|
s3_____multi
|
|
|
|
This plugin should be linked with a name like this
|
|
|
|
s3_<endpoint>_<region>_<bucket>_<folder>_multi
|
|
|
|
Where:
|
|
- endpoint is the s3 endpoint. Ex: s3.eu-west-3.amazonaws.com
|
|
- region is the s3 region. Ex: eu-west-3
|
|
- bucket is the name of your bucket
|
|
- folder is optional.
|
|
If you specify a folder, you will monitor the size of folders inside the specified folder instead of the size of folders at the root of the bucket
|
|
folder can only be the name of a folder at the root location of the bucket
|
|
|
|
Ex: ln -s /path/to/s3_____multi /etc/munin/plugins/s3_s3.eu-west-3.amazonaws.com_eu-west-3_bucket1__multi
|
|
|
|
=head1 CONFIGURATION
|
|
|
|
Following config is needed:
|
|
|
|
[s3_<endpoint>_<region>_<bucket>_*]
|
|
env.access_key_id ACCESS_KEY
|
|
env.secret_access_key SECRET_ACCESS_KEY
|
|
|
|
Following config is optional
|
|
|
|
user munin
|
|
env.s3hostname 1
|
|
|
|
running as munin is optional, but if your default user is nobody, you may end up with a write permission erreur when running the plugin with the update_cache parameter
|
|
setting env.s3hostname to any value, will make the plugin to be advertising itself as running on <endpoint>, creating a dedicated entry in munin host list
|
|
If doing so, you MUST update your munin.conf file on the munin master with the following entry
|
|
|
|
[<endpoint>]
|
|
address <hostname of munin-node server running the script>
|
|
use_node_name no
|
|
|
|
Ex:
|
|
[s3.eu-west-3.amazonaws.com]
|
|
address myserver.mydomain.tld
|
|
use_node_name no
|
|
|
|
Getting the size of a bucket can be (very) long depending of the bucket size.
|
|
The script will not perform the actual check every time munin fetch data (every 5m). At fetch time, it gets data from a local cache
|
|
|
|
You MUST run the script by yourself to update this cache. To do so, you may want to use a cron entry
|
|
You MUST run the script with munin-run so that the script run with the right user, and get all the environment variable (including MUNIN_PLUGSTATE, MUNIN_CAP_MULTIGRAPH)
|
|
|
|
Typical command run by cron would be
|
|
sudo -u munin /usr/sbin/munin-run -d s3_s3.eu-west-3.amazonaws.com_eu-west-3_bucket1__multi update_cache
|
|
|
|
IMPORTANT: You will not get any grpah using you have run the script with the update_cache parameter
|
|
|
|
=head1 Requirements
|
|
|
|
Pyhton 3
|
|
boto3 module (pip3 install boto3)
|
|
|
|
=head1 Todo
|
|
|
|
Support invocation without bucket name (s3_<endpoint>_<region>___multi) and get a graph with the size/object count of all buckets
|
|
|
|
=head1 AUTHOR
|
|
|
|
Jean-Edouard Babin
|
|
https://github.com/jebabin/munin_s3_bucket_size
|
|
|
|
=head1 LICENSE
|
|
|
|
GPLv2
|
|
|
|
=head1 MAGIC MARKERS
|
|
|
|
#%# capabilities=multigraph
|
|
|
|
=cut
|
|
"""
|
|
|
|
|
|
import json
|
|
import subprocess
|
|
import os
|
|
import sys
|
|
import stat
|
|
import re
|
|
import boto3
|
|
|
|
# boto3.set_stream_logger('')
|
|
|
|
""" This is from a preliminary version which was using the s3cmd tool instead of the boto3 lib
|
|
|
|
def get_folder_list_s3cmd():
|
|
process = subprocess.run(['s3cmd', 'ls', 's3://'+bucket + rootdir + '/'], stdout=subprocess.PIPE)
|
|
return process.stdout.decode('utf-8')
|
|
|
|
|
|
def get_folder_info_s3cmd(folder):
|
|
process = subprocess.run(['s3cmd', 'du', 's3://'+bucket + rootdir + '/' + folder + '/'], stdout=subprocess.PIPE)
|
|
return process.stdout.decode('utf-8')
|
|
|
|
|
|
def update_cache_s3cmd(cache_path):
|
|
folders = get_folder_list_s3cmd()
|
|
|
|
folder_dict = {}
|
|
for line in folders.split('\n'):
|
|
if not line.strip():
|
|
continue
|
|
match = re.search(r"^\s+DIR\s+.*?\/([^\/]+)\/$", line)
|
|
if match is not None:
|
|
folder = match.group(1)
|
|
|
|
folder_info = get_folder_info_s3cmd(folder).split('\n')[0]
|
|
# Create the dict entry even if later the command fail to ensure "config" list all
|
|
folder_dict[folder] = {}
|
|
match = re.search(r"^\s*(\d+)\s+(\d+)", folder_info)
|
|
if match is not None:
|
|
size = match.group(1)
|
|
object = match.group(2)
|
|
folder_dict[folder]['size'] = size
|
|
folder_dict[folder]['object'] = object
|
|
|
|
with open(cache_path, 'w') as cache_file:
|
|
cache_file.write(json.dumps(folder_dict))
|
|
|
|
"""
|
|
|
|
def update_cache(cache_path):
|
|
s3r = boto3.resource('s3', region_name=region, endpoint_url="https://"+host, aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key)
|
|
s3_bucket = s3r.Bucket(bucket)
|
|
|
|
total_size = 0
|
|
folder_dict = {}
|
|
for object in s3_bucket.objects.filter(Prefix=rootdir + "/"):
|
|
print(object.key)
|
|
obj_path = re.sub('^' + rootdir + '/', '', object.key)
|
|
folder = obj_path.split('/')[0]
|
|
print(folder)
|
|
if (folder == ""):
|
|
continue
|
|
if folder in folder_dict:
|
|
folder_dict[folder]['size'] += object.size
|
|
folder_dict[folder]['object'] += 1
|
|
else:
|
|
folder_dict[folder] = {}
|
|
folder_dict[folder]['size'] = object.size
|
|
folder_dict[folder]['object'] = 1
|
|
|
|
# with open(cache_path, 'w') as cache_file:
|
|
# cache_file.write(json.dumps(folder_dict))
|
|
|
|
|
|
def read_cache(cache_path):
|
|
if os.path.isfile(cache_path):
|
|
with open(cache_path) as json_file:
|
|
data = json.load(json_file)
|
|
return data
|
|
else:
|
|
return None
|
|
|
|
|
|
def normalize_name(name):
|
|
normal_first = re.sub(r'^[^A-Za-z_]', r'_', name)
|
|
return re.sub(r'[^A-Za-z0-9_]', r'_', normal_first)
|
|
|
|
|
|
# Exit if multigraph not supported
|
|
is_multigraph_capable = os.getenv('MUNIN_CAP_MULTIGRAPH')
|
|
if is_multigraph_capable is None:
|
|
sys.exit(1)
|
|
|
|
# init vars
|
|
use_s3hostname = None
|
|
host = None
|
|
region = None
|
|
bucket = None
|
|
access_key_id = None
|
|
secret_access_key = None
|
|
rootdir = ""
|
|
|
|
# deduct vars from file name
|
|
try:
|
|
# s3_<endpoint>_<region>_<bucket>_<folder>_multi
|
|
match = re.search(r"^(?:|.*\/)s3_([^_]+)_([^_]+)_([^_]+)_([^_]*)_multi$", sys.argv[0])
|
|
if match is not None:
|
|
host = match.group(1)
|
|
region = match.group(2)
|
|
bucket = match.group(3)
|
|
rootdir = match.group(4)
|
|
else:
|
|
print("File name doesn't have the exceptect format: s3_<endpoint>_<region>_<bucket>_<folder>_multi")
|
|
sys.exit(2)
|
|
except Exception as ex:
|
|
logging.error("Caught exception: %s" % ex)
|
|
|
|
# set s3 creds
|
|
access_key_id = os.getenv('access_key_id')
|
|
secret_access_key = os.getenv('secret_access_key')
|
|
|
|
if access_key_id is None:
|
|
print('access_key_id environement variable is not defined.')
|
|
sys.exit(3)
|
|
if secret_access_key is None:
|
|
print('secret_access_key environement variable is not defined.')
|
|
sys.exit(4)
|
|
|
|
# use server or s3 hostname ?
|
|
use_s3hostname = os.getenv('s3hostname')
|
|
|
|
tmpfile = os.getenv('MUNIN_PLUGSTATE') + "/s3_"+host+"_"+region+"_"+bucket+"_"+rootdir+".cache"
|
|
|
|
|
|
if len(sys.argv) == 2:
|
|
if sys.argv[1] == "config":
|
|
if use_s3hostname is not None:
|
|
print('host_name %s' % host)
|
|
data = read_cache(tmpfile)
|
|
if data is None:
|
|
sys.exit(0)
|
|
# Size
|
|
print('multigraph %s_size' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
|
print('graph_category Disk')
|
|
if (rootdir == ""):
|
|
print('graph_title Size of bucket %s' % bucket)
|
|
else:
|
|
print('graph_title Size of folder %s in bucket %s' % (rootdir, bucket))
|
|
print('graph_vlabel bytes')
|
|
i = 0
|
|
for folder in data:
|
|
print('%s.label %s' % (normalize_name(folder), folder[0:45]))
|
|
if i == 0:
|
|
print('%s.draw AREA' % normalize_name(folder))
|
|
i = 1
|
|
else:
|
|
print('%s.draw STACK' % normalize_name(folder))
|
|
print('total.label Total')
|
|
print('total.draw LINE1')
|
|
|
|
# Size per folder
|
|
for folder in data:
|
|
print('multigraph %s_size.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
|
print('data.label %s' % folder[0:45])
|
|
print('graph_category Disk')
|
|
if (rootdir == ""):
|
|
print('graph_title Folder size inside bucket %s' % bucket)
|
|
else:
|
|
print('graph_title Folder size inside folder %s of bucket %s' % (rootdir, bucket))
|
|
print('graph_vlabel bytes')
|
|
print('data.draw LINE1')
|
|
|
|
# Object
|
|
print('multigraph %s_object' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
|
print('graph_category Disk')
|
|
if (rootdir == ""):
|
|
print('graph_title Objects in bucket %s' % bucket)
|
|
else:
|
|
print('graph_title Objects in folder %s of bucket %s' % (rootdir, bucket))
|
|
print('graph_vlabel # of objects')
|
|
i = 0
|
|
for folder in data:
|
|
print('%s.label %s' % (normalize_name(folder), folder[0:45]))
|
|
if i == 0:
|
|
print('%s.draw AREA' % normalize_name(folder))
|
|
i = 1
|
|
else:
|
|
print('%s.draw STACK' % normalize_name(folder))
|
|
print('total.label Total')
|
|
print('total.draw LINE1')
|
|
|
|
# Object per folder
|
|
for folder in data:
|
|
print('multigraph %s_object.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
|
print('data.label %s' % folder[0:45])
|
|
print('graph_category Disk')
|
|
if (rootdir == ""):
|
|
print('graph_title Folder objects inisde bucket %s' % bucket)
|
|
else:
|
|
print('graph_title Folder objects inside folder %s of bucket %s' % (rootdir, bucket))
|
|
print('graph_vlabel # of objects')
|
|
print('data.draw LINE1')
|
|
|
|
|
|
if sys.argv[1] == "update_cache":
|
|
update_cache(tmpfile)
|
|
|
|
else:
|
|
data = read_cache(tmpfile)
|
|
if data is None:
|
|
sys.exit(1)
|
|
size_total = 0
|
|
object_total = 0
|
|
for folder in data:
|
|
size_total = size_total + int(data[folder]['size'])
|
|
object_total = object_total + int(data[folder]['object'])
|
|
|
|
print('multigraph %s_size' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
|
for folder in data:
|
|
print('%s.value %s' % (normalize_name(folder), data[folder]['size']))
|
|
print('total.value %s' % size_total)
|
|
for folder in data:
|
|
print('multigraph %s_size.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
|
print('data.value %s' % data[folder]['size'])
|
|
|
|
print('multigraph %s_object' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
|
for folder in data:
|
|
print('%s.value %s' % (normalize_name(folder), data[folder]['object']))
|
|
print('data.value %s' % object_total)
|
|
for folder in data:
|
|
print('multigraph %s_object.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
|
print('data.value %s' % data[folder]['object'])
|
|
|
|
|