From df3939be76661e69fe0a686cfeabdca10b82e58b Mon Sep 17 00:00:00 2001 From: ian dobson Date: Sun, 1 Feb 2009 16:17:44 +0100 Subject: [PATCH] Initial version --- plugins/other/google-rank | 159 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100755 plugins/other/google-rank diff --git a/plugins/other/google-rank b/plugins/other/google-rank new file mode 100755 index 00000000..4db99f6d --- /dev/null +++ b/plugins/other/google-rank @@ -0,0 +1,159 @@ +#!/bin/bash +# Simple munin plugin to find the google rank for a URL/WORD combination +# +# THIS SCRIPT BREAKS THE TOS OF GOOGLE SO USE WITH CARE AND DON'T BLAME ME IF THINGS GO WRONG +# +# (c) 2009 i.dobson@planet-ian.com +# +# For each url/words that you want to watch you need to create a variable/word pair in your +# munin-node configuration file for example +# +#[google_rank] +#user root +#timeout 60 +#env.URL1 http://www.plant-ian.com +#env.WORD1 avr webcam +#env.URL2 http://www.plant-ian.com +#env.WORD2 bascom +# +# Version 0.5 24.1.2009 +# Added loop to check the first 500 pages. Note the script sleeps 5 seconds beween each page grab so +# If the word/url your looking for is in the higher positions then you need to increase the timeout +# +# Version 0.5 21.1.2009 +# Dump each page grabbed from google into seperate files (helps with debugging) +# +# Version 0.4 19.1.2009 +# Fixed corrupt then empty cache file bug +# +# Version 0.3 19.1.2009 +# The script now grabs the google page based on the LASTHIT counter. +# The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data +# +# Version 0.2 18.01.2009 +# Cache added, the script only grabs the pages from google every 10 calls +# The script still only checks to first 100 pages returned by google +# +# Version 0.1 17.01.2009 Initial release +# The script only checks to first 100 pages returned by google +# + +# Auto Configure, Check it word 1 is defined +if [ "$1" = "autoconf" ]; then + if [ "$URL1" != "" ]; then + if [ "$WORD1" != "" ]; then + echo yes + exit 0 + fi + fi + echo no + exit 1 +fi + +#Configure, loop through each variable defined WORDx URLx dumping it to munin +if [ "$1" = "config" ]; then + iLoop=1 + echo 'graph_title Google page rank' + echo 'graph_args --upper-limit 100 -l 0' + echo 'graph_category other' + echo 'graph_scale no' + echo 'graph_info Google page rank for URLs & Words' + + URL="xxx" + until [ "$URL" = "" ]; do + TMPURL=URL$iLoop + URL="${!TMPURL}" + TMPWORD=WORD$iLoop + WORD="${!TMPWORD}" + if [ "$URL" = "" ]; then + exit 0 + fi + if [ "$WORD" = "" ]; then + exit 0 + fi + VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"` + URL=`echo $URL| sed -e "s/http:\/\///g"` + echo $VAR.label Pagerank $URL - $WORD + let iLoop="$iLoop +1" + done + exit 0 +fi + +#Meat of the program, grabs data from google for one word/url pair using LASTHIT as the pointer to which url/word pair to read + +#Read update & save counter +LASTHIT=0 +if [ -f /tmp/google_rank.status ]; then + LASTHIT=`cat /tmp/google_rank.status | awk '{print $1}'` +fi + +let LASTHIT="$LASTHIT + 1" +echo $LASTHIT > /tmp/google_rank.status + +#Find URL/WORD PAIR for loop counter +TMPURL=URL$LASTHIT +URL="${!TMPURL}" +TMPWORD=WORD$LASTHIT +WORD="${!TMPWORD}" + +if [ "$URL" != "" ]; then + +#Setup defaults + base=0 + num=1 + start=0 + FOUND=0 +#Clean up URL/WORD pair, removing http:// replacing " " with "_", "." with "_", "-" with "-" + VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"` + SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"` + +until [ "$FOUND" -ne "0" ]; do +#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined + wget -q --user-agent=Firefox -O - http://www.google.com/search?q=$SEARCHWORD\&num=100\&hl=en\&safe=off\&pwst=1\&start=$start\&sa=N > /tmp/google_rank.$LASTHIT.data + VALUE=`cat /tmp/google_rank.$LASTHIT.data|sed 's//\n\1\n/g'|awk -v num=$num -v base=$base '{ if ( $1 ~ /^http/ ) print base,num++,$NF }'|awk '{ print $2 " " $3}'|grep -i $URL| awk '{ print $1}'` + VALUE=`echo $VALUE| awk '{ print $1}'` + if [ "$VALUE" = "" ]; then + VALUE=-1 + let start="start + 100" + sleep 5 + else + FOUND=1 + let VALUE="$VALUE + $start" + fi +### echo Start=$start Value=$VALUE Found=$FOUND + if [ "$start" -gt 500 ];then + FOUND=-1 + VALUE=-1 + fi +done + +#Read through cache file saving to array + iLoop=1 + while read line ;do + Data[$iLoop]=$line + let iLoop="$iLoop +1" + done < /tmp/google_rank.cache + +#replace one line with the new value grabbed from google + Data[$LASTHIT]="$VAR.value $VALUE" + +#write data back + rm /tmp/google_rank.cache + for iLoop in `seq 1 10`; do + echo ${Data[$iLoop]} >> /tmp/google_rank.cache + done +fi + +#Reset counter to start + if [ "$LASTHIT" -gt 30 ]; then + echo 0 > /tmp/google_rank.status + fi + +#Dump data to munin + while read line ;do + if [ "$line" != "" ]; then + echo $line + fi + done < /tmp/google_rank.cache +exit 0 +