#!/bin/sh

export LANG=C #for speed

feed="/feed/rss2.xml"

#assume all subscribers check once a day
./get_last_days 1 last.t |
#filter on those accessing feed URL
grep -F "GET $feed" |
#extract ip & user_agent
sed 's/\([^ ]*\) .*"\([^"]*\)"$/\1\t\2/' |
#sort by agent, then by ip
sort -k2 -k1,1 |
#merge and count all requests from same user agent at a particular ip
uniq -c |
#ignore requests from browsers (including firefox live bookmark users?)
#Note thunderbird has rv: in agent string also so can't filter on that.
grep -vE ".*(Firefox|MSIE|AppleWebKit/|Konqueror|Opera)" |
#ignore bots
grep -vEi '(bot|crawl|slurp|excite|curl)' |
#select just agent strings
cut -d"`echo -e '\t'`" -f2 |
#count number of subscribers using each agent
uniq -c |
#uniquely identify different feeds read by google
sed 's/\(.*\)\(feedfetcher.html\)\(.*\)id=\([0-9]*\).*/\1\2.\4\3/' |
#move subscribers counts of online readers to first column
sed 's/ *[0-9]* .*\(http[^;]*\).* \([0-9]*\) subscriber.*/     \2 \1/' |
#merge agents again, in case there were increasing subscribers during day
uniq -f1 |
#sort by subscriber numbers
sort -k1,1n |
#right align numbers
sed "s/^/      /; s/ *\([ 0-9]\{7,\}\) \([^ ].*\)/\1 \2/" |
#truncate lines to 80 chars
sed "s/\(.\{80\}\).*/\1/" #note $COLUMNS not exported
