mirror of
https://github.com/munin-monitoring/contrib.git
synced 2025-07-22 02:51:03 +00:00
Merge pull request #959 from kael-shipman/logevents
Added extinfo to list affected logs and improved documentation
This commit is contained in:
commit
2119206e39
6 changed files with 100 additions and 44 deletions
BIN
plugins/logs/example-graphs/loggrepx_-day.png
Normal file
BIN
plugins/logs/example-graphs/loggrepx_-day.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
BIN
plugins/logs/example-graphs/loggrepx_-week.png
Normal file
BIN
plugins/logs/example-graphs/loggrepx_-week.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 23 KiB |
BIN
plugins/logs/example-graphs/service_events-day.png
Normal file
BIN
plugins/logs/example-graphs/service_events-day.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
BIN
plugins/logs/example-graphs/service_events-week.png
Normal file
BIN
plugins/logs/example-graphs/service_events-week.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 27 KiB |
|
@ -11,8 +11,9 @@ loggrepx - Counts the number of matching log lines by log file
|
||||||
=head1 DESCRIPTION
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
This plugin is somewhat of a bash port of the original loggrep plugin,
|
This plugin is somewhat of a bash port of the original loggrep plugin,
|
||||||
except that it adds a breakdown of matches per file, rather than aggregating
|
except that it displays a breakdown of matches per file, rather than
|
||||||
matches across all files.
|
aggregating matches across all files. It is intended to answer the
|
||||||
|
question, "Which of my logs are reporting concerning events right now?"
|
||||||
|
|
||||||
=head1 CONFIGURATION
|
=head1 CONFIGURATION
|
||||||
|
|
||||||
|
@ -40,12 +41,13 @@ Available config options include the following:
|
||||||
env.[field]_critical - Critical level for specific logfile
|
env.[field]_critical - Critical level for specific logfile
|
||||||
|
|
||||||
NOTE: for any variable with [field] in it, [field] is derived from the
|
NOTE: for any variable with [field] in it, [field] is derived from the
|
||||||
full logfile path by simply replacing all non-alphanumerics with
|
full logfile path by simply removing the preceding slash and replacing
|
||||||
underscores. For example, the "warning" field for the logfile
|
all non-alphanumerics with underscores. For example, the "warning" field
|
||||||
\`/var/log/nginx/errors.log\` would be \`var_log_nginx_errors_log_warning\`
|
for the logfile F</var/log/nginx/errors.log> would be
|
||||||
|
F<var_log_nginx_errors_log_warning>.
|
||||||
|
|
||||||
One good way to get these names is to run \`munin-run [plugin-name]\`
|
One good way to get these names is to run C<munin-run [plugin-name]>
|
||||||
after you've configured the required variables and then just copy/pasting
|
after you've configured the required variables and then just copy/paste
|
||||||
the names from the output.
|
the names from the output.
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
|
|
@ -8,7 +8,7 @@ set -e
|
||||||
|
|
||||||
service_events - Tracks the number of significant event occurrences per service
|
service_events - Tracks the number of significant event occurrences per service
|
||||||
|
|
||||||
This plugin is a riff on the loggrep family (\`loggrep\` and my own \`loggrepx_\`).
|
This plugin is a riff on the loggrep family (C<loggrep> and my own C<loggrepx_>).
|
||||||
However, rather than focusing on single log files, it focuses on providing
|
However, rather than focusing on single log files, it focuses on providing
|
||||||
insight into all "significant events" happening for a given service, which
|
insight into all "significant events" happening for a given service, which
|
||||||
may be found across several log files.
|
may be found across several log files.
|
||||||
|
@ -16,13 +16,18 @@ may be found across several log files.
|
||||||
The idea is that any given service may produce events in various areas of
|
The idea is that any given service may produce events in various areas of
|
||||||
operation. For example, while a typical web app might log runtime errors
|
operation. For example, while a typical web app might log runtime errors
|
||||||
to it's app.log file, a filesystem change may prevent the whole app from
|
to it's app.log file, a filesystem change may prevent the whole app from
|
||||||
event being bootstrapped, which may be logged in an apache log or in syslog.
|
even being bootstrapped, and this crucial error may be logged in an apache
|
||||||
|
log or in syslog.
|
||||||
|
|
||||||
|
This plugin attempts to give visibility into all such "important events"
|
||||||
|
that may affect the proper functioning of a given service. It attempts to
|
||||||
|
answer the question, "Is my service running normally?".
|
||||||
|
|
||||||
This plugin attempts to answer the question, "how is my service doing?".
|
|
||||||
Unfortunately, it won't help you trace down exactly where the events are
|
Unfortunately, it won't help you trace down exactly where the events are
|
||||||
coming from if you happen to be watching a number of different logs, but
|
coming from if you happen to be watching a number of different logs, but
|
||||||
it will at least let you know that something is wrong and that action
|
it will at least let you know that something is wrong and that action
|
||||||
should be taken.
|
should be taken. To try to help with this, the plugin uses the extinfo
|
||||||
|
field to list which logs currently have important events in them.
|
||||||
|
|
||||||
The plugin can be included multiple times to create graphs for various
|
The plugin can be included multiple times to create graphs for various
|
||||||
differing kinds of services. For example, you may have both webservices
|
differing kinds of services. For example, you may have both webservices
|
||||||
|
@ -30,7 +35,10 @@ and system cleanup services, and you want to keep an eye on them in
|
||||||
different ways.
|
different ways.
|
||||||
|
|
||||||
You can accomplish this by linking the plugin twice with different names
|
You can accomplish this by linking the plugin twice with different names
|
||||||
and providing different configuration for each instance.
|
and providing different configuration for each instance. In general, you
|
||||||
|
should think of a single instance of this plugin as representing a single
|
||||||
|
class of services.
|
||||||
|
|
||||||
|
|
||||||
=head1 CONFIGURATION
|
=head1 CONFIGURATION
|
||||||
|
|
||||||
|
@ -70,15 +78,17 @@ Available config options include the following:
|
||||||
|
|
||||||
For plugin-specific options, the following rules apply:
|
For plugin-specific options, the following rules apply:
|
||||||
|
|
||||||
* <type> is any arbitrary string. It just has to match between <type>_logfiles
|
* C<< <type> >> is any arbitrary string. It just has to match between
|
||||||
and <type>_regex. Common values are "apache", "nginx", "apt", "syslog", etc.
|
C<< <type>_logfiles >> and C<< <type>_regex >>. Common values are "apache",
|
||||||
|
"nginx", "apt", "syslog", etc.
|
||||||
* <service> is a string derived by passing the service name through a filter
|
* <service> is a string derived by passing the service name through a filter
|
||||||
that removes non-alphabet characters from the beginning and replaces all non-
|
that removes non-alphabet characters from the beginning and replaces all non-
|
||||||
alpha-numeric characters with underscore (\`_\`).
|
alpha-numeric characters with underscore (C<_>).
|
||||||
* logfiles are bound to services by matching <service>_logbinding on the full
|
* logfiles are bound to services by matching C<< <service>_logbinding >> on the
|
||||||
logfile path. For example, specifying my_site_logbinding=my-site would bind
|
full logfile path. For example, specifying C<my_site_logbinding=my-site> would
|
||||||
both /var/log/my-site/errors.log and /srv/www/my-site/logs/app.log to the
|
bind both F</var/log/my-site/errors.log> and F</srv/www/my-site/logs/app.log>
|
||||||
defined my-site service.
|
to the defined C<my-site> service.
|
||||||
|
|
||||||
|
|
||||||
=head2 SERVICE AUTOCONF
|
=head2 SERVICE AUTOCONF
|
||||||
|
|
||||||
|
@ -91,9 +101,12 @@ will be something like /srv/*/*, which would match all children in /srv/www/ and
|
||||||
/srv/local/.
|
/srv/local/.
|
||||||
|
|
||||||
If you choose not to use the autoconf feature, you MUST specify services as a
|
If you choose not to use the autoconf feature, you MUST specify services as a
|
||||||
space-separated list of service names in the \`services\` variable.
|
space-separated list of service names in the C<services> variable.
|
||||||
|
|
||||||
=head2 EXAMPLE CONFIG
|
|
||||||
|
=head2 EXAMPLE CONFIGS
|
||||||
|
|
||||||
|
This example uses services autoconf:
|
||||||
|
|
||||||
[service_events]
|
[service_events]
|
||||||
user root
|
user root
|
||||||
|
@ -106,14 +119,43 @@ space-separated list of service names in the \`services\` variable.
|
||||||
env.apache_regex error|alert|crit|emerg
|
env.apache_regex error|alert|crit|emerg
|
||||||
env.warning 1
|
env.warning 1
|
||||||
env.critical 5
|
env.critical 5
|
||||||
env.my_special_service_warning 100
|
env.my_special_service_warning 100
|
||||||
env.my_special_service_critical 300
|
env.my_special_service_critical 300
|
||||||
|
|
||||||
|
This example DOESN'T use services autoconf:
|
||||||
|
|
||||||
|
[service_events]
|
||||||
|
user root
|
||||||
|
env.services auth.example.com admin.example.com www.example.com
|
||||||
|
env.auth_example_com_logbinding my-custom-binding[0-9]+
|
||||||
|
env.cfxsvc_logfiles /srv/*/*/logs/app.log
|
||||||
|
env.cfxsvc_regex error|alert|crit|emerg
|
||||||
|
env.phpfpm_logfiles /srv/*/*/logs/php-fpm*.log
|
||||||
|
env.phpfpm_regex Fatal error
|
||||||
|
env.apache_logfiles /srv/*/*/logs/errors.log
|
||||||
|
env.apache_regex error|alert|crit|emerg
|
||||||
|
env.warning 1
|
||||||
|
env.critical 5
|
||||||
|
env.auth_example_com_warning 100
|
||||||
|
env.auth_example_com_critical 300
|
||||||
|
env.www_example_com_warning 50
|
||||||
|
env.www_example_com_critical 100
|
||||||
|
|
||||||
|
This graph will ONLY ever show values for the three listed services, even
|
||||||
|
if other services are installed whose logfiles match the logfiles search.
|
||||||
|
|
||||||
|
Also notice that in this example, we've only listed a log binding for the
|
||||||
|
auth service. The plugin will use the service name by default for any
|
||||||
|
services that don't specify a log binding, so in this case, auth has a
|
||||||
|
custom log binding, while all other services have log bindings equal to
|
||||||
|
their names.
|
||||||
|
|
||||||
|
|
||||||
=head1 AUTHOR
|
=head1 AUTHOR
|
||||||
|
|
||||||
Kael Shipman <kael.shipman@gmail.com>
|
Kael Shipman <kael.shipman@gmail.com>
|
||||||
|
|
||||||
|
|
||||||
=head1 LICENSE
|
=head1 LICENSE
|
||||||
|
|
||||||
MIT LICENSE
|
MIT LICENSE
|
||||||
|
@ -138,6 +180,7 @@ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
OTHER DEALINGS IN THE SOFTWARE.
|
OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
=head1 MAGIC MARKERS
|
=head1 MAGIC MARKERS
|
||||||
|
|
||||||
#%# family=manual
|
#%# family=manual
|
||||||
|
@ -155,9 +198,9 @@ while read -u 3 -r v; do
|
||||||
n=0
|
n=0
|
||||||
while [ $n -lt "${#reqvars[@]}" ]; do
|
while [ $n -lt "${#reqvars[@]}" ]; do
|
||||||
if echo "$v" | grep -Eq "${reqvars[$n]}$"; then
|
if echo "$v" | grep -Eq "${reqvars[$n]}$"; then
|
||||||
!((setvars|=$(( 2 ** $n )) ))
|
setvars=$((setvars | $(( 2 ** $n )) ))
|
||||||
fi
|
fi
|
||||||
!((n++))
|
n=$((n+1))
|
||||||
done
|
done
|
||||||
done 3< <(echo "$vars")
|
done 3< <(echo "$vars")
|
||||||
|
|
||||||
|
@ -166,8 +209,8 @@ done 3< <(echo "$vars")
|
||||||
n=0
|
n=0
|
||||||
allvars=0
|
allvars=0
|
||||||
while [ $n -lt "${#reqvars[@]}" ]; do
|
while [ $n -lt "${#reqvars[@]}" ]; do
|
||||||
!((allvars+=$(( 2 ** $n ))))
|
allvars=$(( allvars + $(( 2 ** $n )) ))
|
||||||
!((n++))
|
n=$((n+1))
|
||||||
done
|
done
|
||||||
|
|
||||||
# And scream if something's not set
|
# And scream if something's not set
|
||||||
|
@ -181,7 +224,7 @@ if ! [ "$setvars" -eq "$allvars" ]; then
|
||||||
>&2 echo " *${reqvars[$n]}"
|
>&2 echo " *${reqvars[$n]}"
|
||||||
fi
|
fi
|
||||||
i=$((i<<1))
|
i=$((i<<1))
|
||||||
!((n++))
|
n=$((n+1))
|
||||||
done
|
done
|
||||||
>&2 echo
|
>&2 echo
|
||||||
>&2 echo "Please read the docs."
|
>&2 echo "Please read the docs."
|
||||||
|
@ -245,7 +288,7 @@ function config() {
|
||||||
echo "graph_args --base 1000 -l 0"
|
echo "graph_args --base 1000 -l 0"
|
||||||
echo "graph_vlabel ${vlabel}"
|
echo "graph_vlabel ${vlabel}"
|
||||||
echo "graph_category other"
|
echo "graph_category other"
|
||||||
echo "graph_info Lists number of matching lines found in various logfiles associated with each service"
|
echo "graph_info Lists number of matching lines found in various logfiles associated with each service. Extinfo displays currently affected logs."
|
||||||
|
|
||||||
local var_prefix
|
local var_prefix
|
||||||
while read -u 3 -r svc; do
|
while read -u 3 -r svc; do
|
||||||
|
@ -266,7 +309,7 @@ function fetch() {
|
||||||
local curstate="$(cat "$MUNIN_STATEFILE")"
|
local curstate="$(cat "$MUNIN_STATEFILE")"
|
||||||
local nextstate=()
|
local nextstate=()
|
||||||
|
|
||||||
local n svcnm varnm service svc svc_counter logbinding logfile lognm logmatch prvlines curlines matches
|
local n svcnm varnm service svc svc_counter_var logbinding logfile lognm logmatch prvlines curlines matches extinfo_var
|
||||||
|
|
||||||
# Set service counters to 0 and set any logbindings that aren't yet set
|
# Set service counters to 0 and set any logbindings that aren't yet set
|
||||||
while read -u 3 -r svc; do
|
while read -u 3 -r svc; do
|
||||||
|
@ -286,6 +329,13 @@ function fetch() {
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Make sure the logfile exists
|
||||||
|
if [ ! -e "$logfile" ]; then
|
||||||
|
>&2 echo "Logfile '$logfile' doesn't exist. Skipping."
|
||||||
|
n=$((n+1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
# Find which service this logfile is associated with
|
# Find which service this logfile is associated with
|
||||||
service=
|
service=
|
||||||
while read -u 4 -r svc; do
|
while read -u 4 -r svc; do
|
||||||
|
@ -306,17 +356,12 @@ function fetch() {
|
||||||
svcnm="$(echo "$service" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
svcnm="$(echo "$service" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
||||||
lognm="$(echo "$logfile" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
lognm="$(echo "$logfile" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
||||||
|
|
||||||
# Get previous line count to determine whether or not the file may have been rotated
|
# Get previous line count to determine whether or not the file may have been rotated (defaulting to 0)
|
||||||
prvlines="$(echo "$curstate" | grep "^${lognm}_lines=" | cut -f 2 -d "=")"
|
prvlines="$(echo "$curstate" | grep "^${lognm}_lines=" | cut -f 2 -d "=")"
|
||||||
if [ -z "$prvlines" ]; then
|
prvlines="${prvlines:-0}"
|
||||||
prvlines=0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get the current number of lines in the file
|
# Get the current number of lines in the file (defaulting to 0 on error)
|
||||||
curlines="$(wc -l < "$logfile")"
|
curlines="$(wc -l < "$logfile")"
|
||||||
if ! [ "$curlines" -eq "$curlines" ] &>/dev/null; then
|
|
||||||
curlines=0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If the current line count is less than the previous line count, we've probably rotated.
|
# If the current line count is less than the previous line count, we've probably rotated.
|
||||||
# Reset to 0.
|
# Reset to 0.
|
||||||
|
@ -330,15 +375,22 @@ function fetch() {
|
||||||
logmatch="${LOGFILEMAP[$n]}_regex"
|
logmatch="${LOGFILEMAP[$n]}_regex"
|
||||||
matches="$(tail -n +"$prvlines" "$logfile" | grep -Ec "${!logmatch}" || true)"
|
matches="$(tail -n +"$prvlines" "$logfile" | grep -Ec "${!logmatch}" || true)"
|
||||||
|
|
||||||
# Aggregate and add to the correct service counter
|
# If there were matches, aggregate them and add this log to the extinfo for the service
|
||||||
svc_counter="${svcnm}_total"
|
if [ "$matches" -gt 0 ]; then
|
||||||
!((matches+=${!svc_counter}))
|
# Aggregate and add to the correct service counter
|
||||||
typeset "$svc_counter=$matches"
|
svc_counter_var="${svcnm}_total"
|
||||||
|
matches=$(($matches + ${!svc_counter_var}))
|
||||||
|
typeset "$svc_counter_var=$matches"
|
||||||
|
|
||||||
|
# Add this log to extinfo for service
|
||||||
|
extinfo_var="${svcnm}_extinfo"
|
||||||
|
typeset "$extinfo_var=${!extinfo_var}$logfile, "
|
||||||
|
fi
|
||||||
|
|
||||||
# Push onto next state
|
# Push onto next state
|
||||||
nextstate+=("${lognm}_lines=$curlines")
|
nextstate+=("${lognm}_lines=$curlines")
|
||||||
|
|
||||||
!((n++))
|
n=$((n+1))
|
||||||
done 3< <(echo "$LOGFILES")
|
done 3< <(echo "$LOGFILES")
|
||||||
|
|
||||||
# Write state to munin statefile
|
# Write state to munin statefile
|
||||||
|
@ -347,8 +399,10 @@ function fetch() {
|
||||||
# Now echo values
|
# Now echo values
|
||||||
while read -u 3 -r svc; do
|
while read -u 3 -r svc; do
|
||||||
svcnm="$(echo "$svc" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
svcnm="$(echo "$svc" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')"
|
||||||
svc_counter="${svcnm}_total"
|
svc_counter_var="${svcnm}_total"
|
||||||
echo "${svcnm}.value ${!svc_counter}"
|
extinfo_var="${svcnm}_extinfo"
|
||||||
|
echo "${svcnm}.value ${!svc_counter_var}"
|
||||||
|
echo "${svcnm}.extinfo ${!extinfo_var}"
|
||||||
done 3< <(IFS=$'\n'; echo "${services[*]}")
|
done 3< <(IFS=$'\n'; echo "${services[*]}")
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue