#!/bin/sh export LANG=C #for speed feed="/feed/rss2.xml" #assume all subscribers check once a week ./get_last_days 7 access_log | #filter on those accessing feed URL grep -F "GET $feed" | #exclude browsers that refer to (click) feed from site grep -vE "pixelbeat.org.*(rv:|MSIE|AppleWebKit/|Konqueror|Opera) .* " | #extract first 16 bits of ip & user_agent sed 's/\([0-9]*\.[0-9]*\)\.[0-9]*\.[0-9]* .*"\([^"]*\)"$/\1\t\2/' | #sort by agent, then by ip net sort -k2 -k1,1 | #merge and count all requests from same user agent at a particular net uniq -c | #ignore single requests from browsers grep -vE " 1 .*(rv:|MSIE|AppleWebKit/|Konqueror|Opera).*" | #ignore bots grep -vE -f agents_to_ignore | #Merge reader variants sed ' s/\([^\t]\)\t.*Firefox.*/\1\tFirefox/; s/\([^\t]\)\t.*MSIE 7.0.*/\1\tIE7/; s/\([^\t]\)\t.*Opera.*/\1\tOpera/; s/\([^\t]\)\t.*Akregator.*/\1\tAkregator/; s/\([^\t]\)\t.*Thunderbird.*/\1\tThunderbird/; s/\([^\t]\)\t.*Liferea.*/\1\tLiferea/; s/\([^\t]\)\t.*Google Desktop.*/\1\tGoogle Desktop/; ' | #select just agent strings cut -d"`echo -e '\t'`" -f2 | #group agent strings sort | #count number of subscribers using each agent uniq -c | #move subscribers counts of online readers to first column sed 's/ *[0-9]* .*\(http[^;]*\).* \([0-9]*\) subscriber.*/ \2 \1/' | #merge agents again, in case there were increasing subscribers during day uniq -f1 | #sort by subscriber numbers sort -k1,1n | #right align numbers sed "s/^/ /; s/ *\([ 0-9]\{7,\}\) \([^ ].*\)/\1 \2/" | #truncate lines to 80 chars sed "s/\(.\{80\}\).*/\1/" #note $COLUMNS not exported