diff --git a/plugins/logs/example-graphs/loggrepx_-day.png b/plugins/logs/example-graphs/loggrepx_-day.png new file mode 100644 index 00000000..20f1228f Binary files /dev/null and b/plugins/logs/example-graphs/loggrepx_-day.png differ diff --git a/plugins/logs/example-graphs/loggrepx_-week.png b/plugins/logs/example-graphs/loggrepx_-week.png new file mode 100644 index 00000000..a7b21db8 Binary files /dev/null and b/plugins/logs/example-graphs/loggrepx_-week.png differ diff --git a/plugins/logs/example-graphs/service_events-day.png b/plugins/logs/example-graphs/service_events-day.png new file mode 100644 index 00000000..f0ee9066 Binary files /dev/null and b/plugins/logs/example-graphs/service_events-day.png differ diff --git a/plugins/logs/example-graphs/service_events-week.png b/plugins/logs/example-graphs/service_events-week.png new file mode 100644 index 00000000..49386258 Binary files /dev/null and b/plugins/logs/example-graphs/service_events-week.png differ diff --git a/plugins/logs/loggrepx_ b/plugins/logs/loggrepx_ index c738ea4d..69c6ab4d 100755 --- a/plugins/logs/loggrepx_ +++ b/plugins/logs/loggrepx_ @@ -11,8 +11,9 @@ loggrepx - Counts the number of matching log lines by log file =head1 DESCRIPTION This plugin is somewhat of a bash port of the original loggrep plugin, -except that it adds a breakdown of matches per file, rather than aggregating -matches across all files. +except that it displays a breakdown of matches per file, rather than +aggregating matches across all files. It is intended to answer the +question, "Which of my logs are reporting concerning events right now?" =head1 CONFIGURATION @@ -40,12 +41,13 @@ Available config options include the following: env.[field]_critical - Critical level for specific logfile NOTE: for any variable with [field] in it, [field] is derived from the -full logfile path by simply replacing all non-alphanumerics with -underscores. For example, the "warning" field for the logfile -\`/var/log/nginx/errors.log\` would be \`var_log_nginx_errors_log_warning\` +full logfile path by simply removing the preceding slash and replacing +all non-alphanumerics with underscores. For example, the "warning" field +for the logfile F would be +F. -One good way to get these names is to run \`munin-run [plugin-name]\` -after you've configured the required variables and then just copy/pasting +One good way to get these names is to run C +after you've configured the required variables and then just copy/paste the names from the output. =head1 AUTHOR diff --git a/plugins/logs/service_events b/plugins/logs/service_events index 0c945bf6..3f08b88a 100755 --- a/plugins/logs/service_events +++ b/plugins/logs/service_events @@ -8,7 +8,7 @@ set -e service_events - Tracks the number of significant event occurrences per service -This plugin is a riff on the loggrep family (\`loggrep\` and my own \`loggrepx_\`). +This plugin is a riff on the loggrep family (C and my own C). However, rather than focusing on single log files, it focuses on providing insight into all "significant events" happening for a given service, which may be found across several log files. @@ -16,13 +16,18 @@ may be found across several log files. The idea is that any given service may produce events in various areas of operation. For example, while a typical web app might log runtime errors to it's app.log file, a filesystem change may prevent the whole app from -event being bootstrapped, which may be logged in an apache log or in syslog. +even being bootstrapped, and this crucial error may be logged in an apache +log or in syslog. + +This plugin attempts to give visibility into all such "important events" +that may affect the proper functioning of a given service. It attempts to +answer the question, "Is my service running normally?". -This plugin attempts to answer the question, "how is my service doing?". Unfortunately, it won't help you trace down exactly where the events are coming from if you happen to be watching a number of different logs, but it will at least let you know that something is wrong and that action -should be taken. +should be taken. To try to help with this, the plugin uses the extinfo +field to list which logs currently have important events in them. The plugin can be included multiple times to create graphs for various differing kinds of services. For example, you may have both webservices @@ -30,7 +35,10 @@ and system cleanup services, and you want to keep an eye on them in different ways. You can accomplish this by linking the plugin twice with different names -and providing different configuration for each instance. +and providing different configuration for each instance. In general, you +should think of a single instance of this plugin as representing a single +class of services. + =head1 CONFIGURATION @@ -70,15 +78,17 @@ Available config options include the following: For plugin-specific options, the following rules apply: -* is any arbitrary string. It just has to match between _logfiles - and _regex. Common values are "apache", "nginx", "apt", "syslog", etc. +* C<< >> is any arbitrary string. It just has to match between + C<< _logfiles >> and C<< _regex >>. Common values are "apache", + "nginx", "apt", "syslog", etc. * is a string derived by passing the service name through a filter that removes non-alphabet characters from the beginning and replaces all non- - alpha-numeric characters with underscore (\`_\`). -* logfiles are bound to services by matching _logbinding on the full - logfile path. For example, specifying my_site_logbinding=my-site would bind - both /var/log/my-site/errors.log and /srv/www/my-site/logs/app.log to the - defined my-site service. + alpha-numeric characters with underscore (C<_>). +* logfiles are bound to services by matching C<< _logbinding >> on the + full logfile path. For example, specifying C would + bind both F and F + to the defined C service. + =head2 SERVICE AUTOCONF @@ -91,9 +101,12 @@ will be something like /srv/*/*, which would match all children in /srv/www/ and /srv/local/. If you choose not to use the autoconf feature, you MUST specify services as a -space-separated list of service names in the \`services\` variable. +space-separated list of service names in the C variable. -=head2 EXAMPLE CONFIG + +=head2 EXAMPLE CONFIGS + +This example uses services autoconf: [service_events] user root @@ -106,14 +119,43 @@ space-separated list of service names in the \`services\` variable. env.apache_regex error|alert|crit|emerg env.warning 1 env.critical 5 - env.my_special_service_warning 100 + env.my_special_service_warning 100 env.my_special_service_critical 300 +This example DOESN'T use services autoconf: + + [service_events] + user root + env.services auth.example.com admin.example.com www.example.com + env.auth_example_com_logbinding my-custom-binding[0-9]+ + env.cfxsvc_logfiles /srv/*/*/logs/app.log + env.cfxsvc_regex error|alert|crit|emerg + env.phpfpm_logfiles /srv/*/*/logs/php-fpm*.log + env.phpfpm_regex Fatal error + env.apache_logfiles /srv/*/*/logs/errors.log + env.apache_regex error|alert|crit|emerg + env.warning 1 + env.critical 5 + env.auth_example_com_warning 100 + env.auth_example_com_critical 300 + env.www_example_com_warning 50 + env.www_example_com_critical 100 + +This graph will ONLY ever show values for the three listed services, even +if other services are installed whose logfiles match the logfiles search. + +Also notice that in this example, we've only listed a log binding for the +auth service. The plugin will use the service name by default for any +services that don't specify a log binding, so in this case, auth has a +custom log binding, while all other services have log bindings equal to +their names. + =head1 AUTHOR Kael Shipman + =head1 LICENSE MIT LICENSE @@ -138,6 +180,7 @@ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + =head1 MAGIC MARKERS #%# family=manual @@ -155,9 +198,9 @@ while read -u 3 -r v; do n=0 while [ $n -lt "${#reqvars[@]}" ]; do if echo "$v" | grep -Eq "${reqvars[$n]}$"; then - !((setvars|=$(( 2 ** $n )) )) + setvars=$((setvars | $(( 2 ** $n )) )) fi - !((n++)) + n=$((n+1)) done done 3< <(echo "$vars") @@ -166,8 +209,8 @@ done 3< <(echo "$vars") n=0 allvars=0 while [ $n -lt "${#reqvars[@]}" ]; do - !((allvars+=$(( 2 ** $n )))) - !((n++)) + allvars=$(( allvars + $(( 2 ** $n )) )) + n=$((n+1)) done # And scream if something's not set @@ -181,7 +224,7 @@ if ! [ "$setvars" -eq "$allvars" ]; then >&2 echo " *${reqvars[$n]}" fi i=$((i<<1)) - !((n++)) + n=$((n+1)) done >&2 echo >&2 echo "Please read the docs." @@ -245,7 +288,7 @@ function config() { echo "graph_args --base 1000 -l 0" echo "graph_vlabel ${vlabel}" echo "graph_category other" - echo "graph_info Lists number of matching lines found in various logfiles associated with each service" + echo "graph_info Lists number of matching lines found in various logfiles associated with each service. Extinfo displays currently affected logs." local var_prefix while read -u 3 -r svc; do @@ -266,7 +309,7 @@ function fetch() { local curstate="$(cat "$MUNIN_STATEFILE")" local nextstate=() - local n svcnm varnm service svc svc_counter logbinding logfile lognm logmatch prvlines curlines matches + local n svcnm varnm service svc svc_counter_var logbinding logfile lognm logmatch prvlines curlines matches extinfo_var # Set service counters to 0 and set any logbindings that aren't yet set while read -u 3 -r svc; do @@ -286,6 +329,13 @@ function fetch() { continue fi + # Make sure the logfile exists + if [ ! -e "$logfile" ]; then + >&2 echo "Logfile '$logfile' doesn't exist. Skipping." + n=$((n+1)) + continue + fi + # Find which service this logfile is associated with service= while read -u 4 -r svc; do @@ -306,17 +356,12 @@ function fetch() { svcnm="$(echo "$service" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')" lognm="$(echo "$logfile" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')" - # Get previous line count to determine whether or not the file may have been rotated + # Get previous line count to determine whether or not the file may have been rotated (defaulting to 0) prvlines="$(echo "$curstate" | grep "^${lognm}_lines=" | cut -f 2 -d "=")" - if [ -z "$prvlines" ]; then - prvlines=0 - fi + prvlines="${prvlines:-0}" - # Get the current number of lines in the file + # Get the current number of lines in the file (defaulting to 0 on error) curlines="$(wc -l < "$logfile")" - if ! [ "$curlines" -eq "$curlines" ] &>/dev/null; then - curlines=0 - fi # If the current line count is less than the previous line count, we've probably rotated. # Reset to 0. @@ -330,15 +375,22 @@ function fetch() { logmatch="${LOGFILEMAP[$n]}_regex" matches="$(tail -n +"$prvlines" "$logfile" | grep -Ec "${!logmatch}" || true)" - # Aggregate and add to the correct service counter - svc_counter="${svcnm}_total" - !((matches+=${!svc_counter})) - typeset "$svc_counter=$matches" + # If there were matches, aggregate them and add this log to the extinfo for the service + if [ "$matches" -gt 0 ]; then + # Aggregate and add to the correct service counter + svc_counter_var="${svcnm}_total" + matches=$(($matches + ${!svc_counter_var})) + typeset "$svc_counter_var=$matches" + + # Add this log to extinfo for service + extinfo_var="${svcnm}_extinfo" + typeset "$extinfo_var=${!extinfo_var}$logfile, " + fi # Push onto next state nextstate+=("${lognm}_lines=$curlines") - !((n++)) + n=$((n+1)) done 3< <(echo "$LOGFILES") # Write state to munin statefile @@ -347,8 +399,10 @@ function fetch() { # Now echo values while read -u 3 -r svc; do svcnm="$(echo "$svc" | sed -r 's/^[^a-zA-Z]+//g' | sed -r 's/[^a-zA-Z0-9]+/_/g')" - svc_counter="${svcnm}_total" - echo "${svcnm}.value ${!svc_counter}" + svc_counter_var="${svcnm}_total" + extinfo_var="${svcnm}_extinfo" + echo "${svcnm}.value ${!svc_counter_var}" + echo "${svcnm}.extinfo ${!extinfo_var}" done 3< <(IFS=$'\n'; echo "${services[*]}") return 0