mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-21 18:41:03 +00:00
Merge pull request #959 from kael-shipman/logevents
Added extinfo to list affected logs and improved documentation
This commit is contained in:
commit
2119206e39
6 changed files with 100 additions and 44 deletions
BIN
plugins/logs/example-graphs/loggrepx_-day.png
Normal file
BIN
plugins/logs/example-graphs/loggrepx_-day.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
BIN
plugins/logs/example-graphs/loggrepx_-week.png
Normal file
BIN
plugins/logs/example-graphs/loggrepx_-week.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
BIN
plugins/logs/example-graphs/service_events-day.png
Normal file
BIN
plugins/logs/example-graphs/service_events-day.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
BIN
plugins/logs/example-graphs/service_events-week.png
Normal file
BIN
plugins/logs/example-graphs/service_events-week.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 27 KiB |
|
@ -11,8 +11,9 @@ loggrepx - Counts the number of matching log lines by log file
|
|||
=head1 DESCRIPTION
|
||||
|
||||
This plugin is somewhat of a bash port of the original loggrep plugin,
|
||||
except that it adds a breakdown of matches per file, rather than aggregating
|
||||
matches across all files.
|
||||
except that it displays a breakdown of matches per file, rather than
|
||||
aggregating matches across all files. It is intended to answer the
|
||||
question, "Which of my logs are reporting concerning events right now?"
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
|
@ -40,12 +41,13 @@ Available config options include the following:
|
|||
env.[field]_critical - Critical level for specific logfile
|
||||
|
||||
NOTE: for any variable with [field] in it, [field] is derived from the
|
||||
full logfile path by simply replacing all non-alphanumerics with
|
||||
underscores. For example, the "warning" field for the logfile
|
||||
\`/var/log/nginx/errors.log\` would be \`var_log_nginx_errors_log_warning\`
|
||||
full logfile path by simply removing the preceding slash and replacing
|
||||
all non-alphanumerics with underscores. For example, the "warning" field
|
||||
for the logfile F</var/log/nginx/errors.log> would be
|
||||
F<var_log_nginx_errors_log_warning>.
|
||||
|
||||
One good way to get these names is to run \`munin-run [plugin-name]\`
|
||||
after you've configured the required variables and then just copy/pasting
|
||||
One good way to get these names is to run C<munin-run [plugin-name]>
|
||||
after you've configured the required variables and then just copy/paste
|
||||
the names from the output.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
|
|
@ -8,7 +8,7 @@ set -e
|
|||
|
||||
service_events - Tracks the number of significant event occurrences per service
|
||||
|
||||
This plugin is a riff on the loggrep family (\`loggrep\` and my own \`loggrepx_\`).
|
||||
This plugin is a riff on the loggrep family (C<loggrep> and my own C<loggrepx_>).
|
||||
However, rather than focusing on single log files, it focuses on providing
|
||||
insight into all "significant events" happening for a given service, which
|
||||
may be found across several log files.
|
||||
|
@ -16,13 +16,18 @@ may be found across several log files.
|
|||
The idea is that any given service may produce events in various areas of
|
||||
operation. For example, while a typical web app might log runtime errors
|
||||
to it's app.log file, a filesystem change may prevent the whole app from
|
||||
event being bootstrapped, which may be logged in an apache log or in syslog.
|
||||
even being bootstrapped, and this crucial error may be logged in an apache
|
||||
log or in syslog.
|
||||
|
||||
This plugin attempts to give visibility into all such "important events"
|
||||
that may affect the proper functioning of a given service. It attempts to
|
||||
answer the question, "Is my service running normally?".
|
||||
|
||||
This plugin attempts to answer the question, "how is my service doing?".
|
||||
Unfortunately, it won't help you trace down exactly where the events are
|
||||
coming from if you happen to be watching a number of different logs, but
|
||||
it will at least let you know that something is wrong and that action
|
||||
should be taken.
|
||||
should be taken. To try to help with this, the plugin uses the extinfo
|
||||
field to list which logs currently have important events in them.
|
||||
|
||||
The plugin can be included multiple times to create graphs for various
|
||||
differing kinds of services. For example, you may have both webservices
|
||||
|
@ -30,7 +35,10 @@ and system cleanup services, and you want to keep an eye on them in
|
|||
different ways.
|
||||
|
||||
You can accomplish this by linking the plugin twice with different names
|
||||
and providing different configuration for each instance.
|
||||
and providing different configuration for each instance. In general, you
|
||||
should think of a single instance of this plugin as representing a single
|
||||
class of services.
|
||||
|
||||
|
||||
=head1 CONFIGURATION
|
||||
|
||||
|
@ -70,15 +78,17 @@ Available config options include the following:
|
|||
|
||||
For plugin-specific options, the following rules apply:
|
||||
|
||||
* <type> is any arbitrary string. It just has to match between <type>_logfiles
|
||||
and <type>_regex. Common values are "apache", "nginx", "apt", "syslog", etc.
|
||||
* C<< <type> >> is any arbitrary string. It just has to match between
|
||||
C<< <type>_logfiles >> and C<< <type>_regex >>. Common values are "apache",
|
||||
"nginx", "apt", "syslog", etc.
|
||||
* <service> is a string derived by passing the service name through a filter
|
||||
that removes non-alphabet characters from the beginning and replaces all non-
|
||||
alpha-numeric characters with underscore (\`_\`).
|
||||
* logfiles are bound to services by matching <service>_logbinding on the full
|
||||
logfile path. For example, specifying my_site_logbinding=my-site would bind
|
||||
both /var/log/my-site/errors.log and /srv/www/my-site/logs/app.log to the
|
||||
defined my-site service.
|
||||
alpha-numeric characters with underscore (C<_>).
|
||||
* logfiles are bound to services by matching C<< <service>_logbinding >> on the
|
||||
full logfile path. For example, specifying C<my_site_logbinding=my-site> would
|
||||
bind both F</var/log/my-site/errors.log> and F</srv/www/my-site/logs/app.log>
|
||||
to the defined C<my-site> service.
|
||||
|
||||
|
||||
=head2 SERVICE AUTOCONF
|
||||
|
||||
|
@ -91,9 +101,12 @@ will be something like /srv/*/*, which would match all children in /srv/www/ and
|
|||
/srv/local/.
|
||||
|
||||
If you choose not to use the autoconf feature, you MUST specify services as a
|
||||
space-separated list of service names in the \`services\` variable.
|
||||
space-separated list of service names in the C<services> variable.
|
||||
|
||||
=head2 EXAMPLE CONFIG
|
||||
|
||||
=head2 EXAMPLE CONFIGS
|
||||
|
||||
This example uses services autoconf:
|
||||
|
||||
[service_events]
|
||||
user root
|
||||
|
@ -106,14 +119,43 @@ space-separated list of service names in the \`services\` variable.
|
|||
env.apache_regex error|alert|crit|emerg
|
||||
env.warning 1
|
||||
env.critical 5
|
||||
env.my_special_service_warning 100
|
||||
env.my_special_service_warning 100
|
||||
env.my_special_service_critical 300
|
||||
|
||||
This example DOESN'T use services autoconf:
|
||||
|
||||
[service_events]
|
||||
user root
|
||||
env.services auth.example.com admin.example.com www.example.com
|
||||
env.auth_example_com_logbinding my-custom-binding[0-9]+
|
||||
env.cfxsvc_logfiles /srv/*/*/logs/app.log
|
||||
env.cfxsvc_regex error|alert|crit|emerg
|
||||
env.phpfpm_logfiles /srv/*/*/logs/php-fpm*.log
|
||||
env.phpfpm_regex Fatal error
|
||||
env.apache_logfiles /srv/*/*/logs/errors.log
|
||||
env.apache_regex error|alert|crit|emerg
|
||||
env.warning 1
|
||||
env.critical 5
|
||||
env.auth_example_com_warning 100
|
||||
env.auth_example_com_critical 300
|
||||
env.www_example_com_warning 50
|
||||
env.www_example_com_critical 100
|
||||
|
||||
This graph will ONLY ever show values for the three listed services, even
|
||||
if other services are installed whose logfiles match the logfiles search.
|
||||
|
||||
Also notice that in this example, we've only listed a log binding for the
|
||||
auth service. The plugin will use the service name by default for any
|
||||
services that don't specify a log binding, so in this case, auth has a
|
||||
custom log binding, while all other services have log bindings equal to
|
||||
their names.
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Kael Shipman <kael.shipman@gmail.com>
|
||||
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
MIT LICENSE
|
||||
|
@ -138,6 +180,7 @@ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
|
||||
=head1 MAGIC MARKERS
|
||||
|
||||
#%# family=manual
|
||||
|
@ -155,9 +198,9 @@ while read -u 3 -r v; do
|
|||
n=0
|
||||
while [ $n -lt "${#reqvars[@]}" ]; do
|
||||
if echo "$v" | grep -Eq "${reqvars[$n]}$"; then
|
||||
!((setvars|=$(( 2 ** $n )) ))
|
||||
setvars=$((setvars | $(( 2 ** $n )) ))
|
||||
fi
|
||||
!((n++))
|
||||
n=$((n+1))
|
||||
done
|
||||
done 3< <(echo "$vars")
|
||||
|
||||
|
@ -166,8 +209,8 @@ done 3< <(echo "$vars")
|
|||
n=0
|
||||
allvars=0
|
||||
while [ $n -lt "${#reqvars[@]}" ]; do
|
||||
!((allvars+=$(( 2 ** $n ))))
|
||||
!((n++))
|
||||
allvars=$(( allvars + $(( 2 ** $n )) ))
|
||||
n=$((n+1))
|
||||
done
|
||||
|
||||
# And scream if something's not set
|
||||
|
@ -181,7 +224,7 @@ if ! [ "$setvars" -eq "$allvars" ]; then
|
|||
>&2 echo " *${reqvars[$n]}"
|
||||
fi
|
||||
i=$((i<<1))
|
||||
!((n++))
|
||||
n=$((n+1))
|
||||
done
|
||||
>&2 echo
|
||||
>&2 echo "Please read the docs."
|
||||
|
@ -245,7 +288,7 @@ function config() {
|
|||
echo "graph_args --base 1000 -l 0"
|
||||
echo "graph_vlabel ${vlabel}"
|
||||
echo "graph_category other"
|
||||
echo "graph_info Lists number of matching lines found in various logfiles associated with each service"
|
||||
echo "graph_info Lists number of matching lines found in various logfiles associated with each service. Extinfo displays currently affected logs."
|
||||
|
||||
local var_prefix
|
||||
while read -u 3 -r svc; do
|
||||
|
@ -266,7 +309,7 @@ function fetch() {
|
|||
local curstate="$(cat "$MUNIN_STATEFILE")"
|
||||
local nextstate=()
|
||||
|
||||
local n svcnm varnm service svc svc_counter logbinding logfile lognm logmatch prvlines curlines matches
|
||||
local n svcnm varnm service svc svc_counter_var logbinding logfile lognm logmatch prvlines curlines matches extinfo_var
|
||||
|
||||
# Set service counters to 0 and set any logbindings that aren't yet set
|
||||
while read -u 3 -r svc; do
|
||||
|
@ -286,6 +329,13 @@ function fetch() {
|
|||
continue
|
||||
fi
|
||||
|
||||
# Make sure the logfile exists
|
||||
if [ ! -e "$logfile" ]; then
|
||||
>&2 echo "Logfile '$logfile' doesn't exist. Skipping."
|
||||
n=$((n+1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Find which service this logfile is associated with
|
||||
service=
|
||||
while read -u 4 -r svc; do
|
||||
|
@ -306,17 +356,12 @@ function fetch() {
|
|||
svcnm="$(echo "$service" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
||||
lognm="$(echo "$logfile" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
||||
|
||||
# Get previous line count to determine whether or not the file may have been rotated
|
||||
# Get previous line count to determine whether or not the file may have been rotated (defaulting to 0)
|
||||
prvlines="$(echo "$curstate" | grep "^${lognm}_lines=" | cut -f 2 -d "=")"
|
||||
if [ -z "$prvlines" ]; then
|
||||
prvlines=0
|
||||
fi
|
||||
prvlines="${prvlines:-0}"
|
||||
|
||||
# Get the current number of lines in the file
|
||||
# Get the current number of lines in the file (defaulting to 0 on error)
|
||||
curlines="$(wc -l < "$logfile")"
|
||||
if ! [ "$curlines" -eq "$curlines" ] &>/dev/null; then
|
||||
curlines=0
|
||||
fi
|
||||
|
||||
# If the current line count is less than the previous line count, we've probably rotated.
|
||||
# Reset to 0.
|
||||
|
@ -330,15 +375,22 @@ function fetch() {
|
|||
logmatch="${LOGFILEMAP[$n]}_regex"
|
||||
matches="$(tail -n +"$prvlines" "$logfile" | grep -Ec "${!logmatch}" || true)"
|
||||
|
||||
# Aggregate and add to the correct service counter
|
||||
svc_counter="${svcnm}_total"
|
||||
!((matches+=${!svc_counter}))
|
||||
typeset "$svc_counter=$matches"
|
||||
# If there were matches, aggregate them and add this log to the extinfo for the service
|
||||
if [ "$matches" -gt 0 ]; then
|
||||
# Aggregate and add to the correct service counter
|
||||
svc_counter_var="${svcnm}_total"
|
||||
matches=$(($matches + ${!svc_counter_var}))
|
||||
typeset "$svc_counter_var=$matches"
|
||||
|
||||
# Add this log to extinfo for service
|
||||
extinfo_var="${svcnm}_extinfo"
|
||||
typeset "$extinfo_var=${!extinfo_var}$logfile, "
|
||||
fi
|
||||
|
||||
# Push onto next state
|
||||
nextstate+=("${lognm}_lines=$curlines")
|
||||
|
||||
!((n++))
|
||||
n=$((n+1))
|
||||
done 3< <(echo "$LOGFILES")
|
||||
|
||||
# Write state to munin statefile
|
||||
|
@ -347,8 +399,10 @@ function fetch() {
|
|||
# Now echo values
|
||||
while read -u 3 -r svc; do
|
||||
svcnm="$(echo "$svc" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
||||
svc_counter="${svcnm}_total"
|
||||
echo "${svcnm}.value ${!svc_counter}"
|
||||
svc_counter_var="${svcnm}_total"
|
||||
extinfo_var="${svcnm}_extinfo"
|
||||
echo "${svcnm}.value ${!svc_counter_var}"
|
||||
echo "${svcnm}.extinfo ${!extinfo_var}"
|
||||
done 3< <(IFS=$'\n'; echo "${services[*]}")
|
||||
|
||||
return 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue