mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
initial commit
This commit is contained in:
parent
e3f08308ab
commit
554b757bb9
1 changed files with 318 additions and 0 deletions
318
plugins/s3_bucket_size/s3_____multi
Executable file
318
plugins/s3_bucket_size/s3_____multi
Executable file
|
@ -0,0 +1,318 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
""" Munin plugin to monitor the size and file numbers of a bucket in a S3 compatible storage
|
||||
|
||||
=head1 Name
|
||||
|
||||
s3_____multi
|
||||
|
||||
This plugin should be linked with a name like this
|
||||
|
||||
s3_<endpoint>_<region>_<bucket>_<folder>_multi
|
||||
|
||||
Where:
|
||||
- endpoint is the s3 endpoint. Ex: s3.eu-west-3.amazonaws.com
|
||||
- region is the s3 region. Ex: eu-west-3
|
||||
- bucket is the name of your bucket
|
||||
- folder is optional.
|
||||
If you specify a folder, you will monitor the size of folders inside the specified folder instead of the size of folders at the root of the bucket
|
||||
folder can only be the name of a folder at the root location of the bucket
|
||||
|
||||
Ex: ln -s /path/to/s3_____multi /etc/munin/plugins/s3_s3.eu-west-3.amazonaws.com_eu-west-3_bucket1__multi
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
Following config is needed:
|
||||
|
||||
[s3_<endpoint>_<region>_<bucket>_*]
|
||||
env.access_key_id ACCESS_KEY
|
||||
env.secret_access_key SECRET_ACCESS_KEY
|
||||
|
||||
Following config is optional
|
||||
|
||||
user munin
|
||||
env.s3hostname 1
|
||||
|
||||
running as munin is optional, but if your default user is nobody, you may end up with a write permission erreur when running the plugin with the update_cache parameter
|
||||
setting env.s3hostname to any value, will make the plugin to be advertising itself as running on <endpoint>, creating a dedicated entry in munin host list
|
||||
If doing so, you MUST update your munin.conf file on the munin master with the following entry
|
||||
|
||||
[<endpoint>]
|
||||
address <hostname of munin-node server running the script>
|
||||
use_node_name no
|
||||
|
||||
Ex:
|
||||
[s3.eu-west-3.amazonaws.com]
|
||||
address myserver.mydomain.tld
|
||||
use_node_name no
|
||||
|
||||
Getting the size of a bucket can be (very) long depending of the bucket size.
|
||||
The script will not perform the actual check every time munin fetch data (every 5m). At fetch time, it gets data from a local cache
|
||||
|
||||
You MUST run the script by yourself to update this cache. To do so, you may want to use a cron entry
|
||||
You MUST run the script with munin-run so that the script run with the right user, and get all the environment variable (including MUNIN_PLUGSTATE, MUNIN_CAP_MULTIGRAPH)
|
||||
|
||||
Typical command run by cron would be
|
||||
sudo -u munin /usr/sbin/munin-run -d s3_s3.eu-west-3.amazonaws.com_eu-west-3_bucket1__multi update_cache
|
||||
|
||||
IMPORTANT: You will not get any grpah using you have run the script with the update_cache parameter
|
||||
|
||||
=head1 Requirements
|
||||
|
||||
Pyhton 3
|
||||
boto3 module (pip3 install boto3)
|
||||
|
||||
=head1 Todo
|
||||
|
||||
Support invocation without bucket name (s3_<endpoint>_<region>___multi) and get a graph with the size/object count of all buckets
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Jean-Edouard Babin
|
||||
https://github.com/jebabin/munin_s3_bucket_size
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
GPLv2
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# capabilities=multigraph
|
||||
|
||||
=cut
|
||||
"""
|
||||
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
import stat
|
||||
import re
|
||||
import boto3
|
||||
|
||||
# boto3.set_stream_logger('')
|
||||
|
||||
""" This is from a preliminary version which was using the s3cmd tool instead of the boto3 lib
|
||||
|
||||
def get_folder_list_s3cmd():
|
||||
process = subprocess.run(['s3cmd', 'ls', 's3://'+bucket + rootdir + '/'], stdout=subprocess.PIPE)
|
||||
return process.stdout.decode('utf-8')
|
||||
|
||||
|
||||
def get_folder_info_s3cmd(folder):
|
||||
process = subprocess.run(['s3cmd', 'du', 's3://'+bucket + rootdir + '/' + folder + '/'], stdout=subprocess.PIPE)
|
||||
return process.stdout.decode('utf-8')
|
||||
|
||||
|
||||
def update_cache_s3cmd(cache_path):
|
||||
folders = get_folder_list_s3cmd()
|
||||
|
||||
folder_dict = {}
|
||||
for line in folders.split('\n'):
|
||||
if not line.strip():
|
||||
continue
|
||||
match = re.search(r"^\s+DIR\s+.*?\/([^\/]+)\/$", line)
|
||||
if match is not None:
|
||||
folder = match.group(1)
|
||||
|
||||
folder_info = get_folder_info_s3cmd(folder).split('\n')[0]
|
||||
# Create the dict entry even if later the command fail to ensure "config" list all
|
||||
folder_dict[folder] = {}
|
||||
match = re.search(r"^\s*(\d+)\s+(\d+)", folder_info)
|
||||
if match is not None:
|
||||
size = match.group(1)
|
||||
object = match.group(2)
|
||||
folder_dict[folder]['size'] = size
|
||||
folder_dict[folder]['object'] = object
|
||||
|
||||
with open(cache_path, 'w') as cache_file:
|
||||
cache_file.write(json.dumps(folder_dict))
|
||||
|
||||
"""
|
||||
|
||||
def update_cache(cache_path):
|
||||
s3r = boto3.resource('s3', region_name=region, endpoint_url="https://"+host, aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key)
|
||||
s3_bucket = s3r.Bucket(bucket)
|
||||
|
||||
total_size = 0
|
||||
folder_dict = {}
|
||||
for object in s3_bucket.objects.filter(Prefix=rootdir + "/"):
|
||||
print(object.key)
|
||||
obj_path = re.sub('^' + rootdir + '/', '', object.key)
|
||||
folder = obj_path.split('/')[0]
|
||||
print(folder)
|
||||
if (folder == ""):
|
||||
continue
|
||||
if folder in folder_dict:
|
||||
folder_dict[folder]['size'] += object.size
|
||||
folder_dict[folder]['object'] += 1
|
||||
else:
|
||||
folder_dict[folder] = {}
|
||||
folder_dict[folder]['size'] = object.size
|
||||
folder_dict[folder]['object'] = 1
|
||||
|
||||
# with open(cache_path, 'w') as cache_file:
|
||||
# cache_file.write(json.dumps(folder_dict))
|
||||
|
||||
|
||||
def read_cache(cache_path):
|
||||
if os.path.isfile(cache_path):
|
||||
with open(cache_path) as json_file:
|
||||
data = json.load(json_file)
|
||||
return data
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def normalize_name(name):
|
||||
normal_first = re.sub(r'^[^A-Za-z_]', r'_', name)
|
||||
return re.sub(r'[^A-Za-z0-9_]', r'_', normal_first)
|
||||
|
||||
|
||||
# Exit if multigraph not supported
|
||||
is_multigraph_capable = os.getenv('MUNIN_CAP_MULTIGRAPH')
|
||||
if is_multigraph_capable is None:
|
||||
sys.exit(1)
|
||||
|
||||
# init vars
|
||||
use_s3hostname = None
|
||||
host = None
|
||||
region = None
|
||||
bucket = None
|
||||
access_key_id = None
|
||||
secret_access_key = None
|
||||
rootdir = ""
|
||||
|
||||
# deduct vars from file name
|
||||
try:
|
||||
# s3_<endpoint>_<region>_<bucket>_<folder>_multi
|
||||
match = re.search(r"^(?:|.*\/)s3_([^_]+)_([^_]+)_([^_]+)_([^_]*)_multi$", sys.argv[0])
|
||||
if match is not None:
|
||||
host = match.group(1)
|
||||
region = match.group(2)
|
||||
bucket = match.group(3)
|
||||
rootdir = match.group(4)
|
||||
else:
|
||||
print("File name doesn't have the exceptect format: s3_<endpoint>_<region>_<bucket>_<folder>_multi")
|
||||
sys.exit(2)
|
||||
except Exception as ex:
|
||||
logging.error("Caught exception: %s" % ex)
|
||||
|
||||
# set s3 creds
|
||||
access_key_id = os.getenv('access_key_id')
|
||||
secret_access_key = os.getenv('secret_access_key')
|
||||
|
||||
if access_key_id is None:
|
||||
print('access_key_id environement variable is not defined.')
|
||||
sys.exit(3)
|
||||
if secret_access_key is None:
|
||||
print('secret_access_key environement variable is not defined.')
|
||||
sys.exit(4)
|
||||
|
||||
# use server or s3 hostname ?
|
||||
use_s3hostname = os.getenv('s3hostname')
|
||||
|
||||
tmpfile = os.getenv('MUNIN_PLUGSTATE') + "/s3_"+host+"_"+region+"_"+bucket+"_"+rootdir+".cache"
|
||||
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
if sys.argv[1] == "config":
|
||||
if use_s3hostname is not None:
|
||||
print('host_name %s' % host)
|
||||
data = read_cache(tmpfile)
|
||||
if data is None:
|
||||
sys.exit(0)
|
||||
# Size
|
||||
print('multigraph %s_size' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
||||
print('graph_category Disk')
|
||||
if (rootdir == ""):
|
||||
print('graph_title Size of bucket %s' % bucket)
|
||||
else:
|
||||
print('graph_title Size of folder %s in bucket %s' % (rootdir, bucket))
|
||||
print('graph_vlabel bytes')
|
||||
i = 0
|
||||
for folder in data:
|
||||
print('%s.label %s' % (normalize_name(folder), folder[0:45]))
|
||||
if i == 0:
|
||||
print('%s.draw AREA' % normalize_name(folder))
|
||||
i = 1
|
||||
else:
|
||||
print('%s.draw STACK' % normalize_name(folder))
|
||||
print('total.label Total')
|
||||
print('total.draw LINE1')
|
||||
|
||||
# Size per folder
|
||||
for folder in data:
|
||||
print('multigraph %s_size.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
||||
print('data.label %s' % folder[0:45])
|
||||
print('graph_category Disk')
|
||||
if (rootdir == ""):
|
||||
print('graph_title Folder size inside bucket %s' % bucket)
|
||||
else:
|
||||
print('graph_title Folder size inside folder %s of bucket %s' % (rootdir, bucket))
|
||||
print('graph_vlabel bytes')
|
||||
print('data.draw LINE1')
|
||||
|
||||
# Object
|
||||
print('multigraph %s_object' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
||||
print('graph_category Disk')
|
||||
if (rootdir == ""):
|
||||
print('graph_title Objects in bucket %s' % bucket)
|
||||
else:
|
||||
print('graph_title Objects in folder %s of bucket %s' % (rootdir, bucket))
|
||||
print('graph_vlabel # of objects')
|
||||
i = 0
|
||||
for folder in data:
|
||||
print('%s.label %s' % (normalize_name(folder), folder[0:45]))
|
||||
if i == 0:
|
||||
print('%s.draw AREA' % normalize_name(folder))
|
||||
i = 1
|
||||
else:
|
||||
print('%s.draw STACK' % normalize_name(folder))
|
||||
print('total.label Total')
|
||||
print('total.draw LINE1')
|
||||
|
||||
# Object per folder
|
||||
for folder in data:
|
||||
print('multigraph %s_object.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
||||
print('data.label %s' % folder[0:45])
|
||||
print('graph_category Disk')
|
||||
if (rootdir == ""):
|
||||
print('graph_title Folder objects inisde bucket %s' % bucket)
|
||||
else:
|
||||
print('graph_title Folder objects inside folder %s of bucket %s' % (rootdir, bucket))
|
||||
print('graph_vlabel # of objects')
|
||||
print('data.draw LINE1')
|
||||
|
||||
|
||||
if sys.argv[1] == "update_cache":
|
||||
update_cache(tmpfile)
|
||||
|
||||
else:
|
||||
data = read_cache(tmpfile)
|
||||
if data is None:
|
||||
sys.exit(1)
|
||||
size_total = 0
|
||||
object_total = 0
|
||||
for folder in data:
|
||||
size_total = size_total + int(data[folder]['size'])
|
||||
object_total = object_total + int(data[folder]['object'])
|
||||
|
||||
print('multigraph %s_size' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
||||
for folder in data:
|
||||
print('%s.value %s' % (normalize_name(folder), data[folder]['size']))
|
||||
print('total.value %s' % size_total)
|
||||
for folder in data:
|
||||
print('multigraph %s_size.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
||||
print('data.value %s' % data[folder]['size'])
|
||||
|
||||
print('multigraph %s_object' % normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir))
|
||||
for folder in data:
|
||||
print('%s.value %s' % (normalize_name(folder), data[folder]['object']))
|
||||
print('data.value %s' % object_total)
|
||||
for folder in data:
|
||||
print('multigraph %s_object.%s' % (normalize_name("s3_"+host+"_"+region+"_"+bucket+"_"+rootdir), normalize_name(folder)))
|
||||
print('data.value %s' % data[folder]['object'])
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue