#!/bin/sh

# output the PageViews per day from an access_log passed to stdin

export LANG=C #for speed

grep -Ev -f agents_to_ignore -f files_to_ignore |
grep -Ev "( HEAD | 30[17] | 206 | 40[34] )" | #ignore certain HTTP status
cut -d ' ' -f4 |                           #extract timestamp
uniq -c -w11 |                             #merge and count hits per day
sed 's/ *\([0-9]*\) \[\([^:]*\).*/\2 \1/'  #extract date and count for each day
