#!/bin/sh
# bashfeed - Generate an RSS 2.0 feed directly from your existing web site
# Author - Pádraig Brady
# Licence - GPL V2
# Releases -
# 1.0 - Jun 19 2006 - Initial release
# 1.1 - Jun 26 2006 - Exclude files with "Exclude from bashfeed"
# HTML comment within the first 10 lines.
# 1.2 - May 01 2007 - Add author elements (from html if present)
# Just run this script from the root directory of your web site and
# it will generate feed items for the newest files. Generally I do this
# just before I sync my local web site copy to my public server.
# Which files are selected and excluded can be configured below.
# Note for html files it will extract the following elements if present
#
# Item title
#
#
#
# Note this script will keep the same item guid for an updated file.
# Just updating the pubDate will not cause liferea 1.0.11 at least
# to mark the item as updated (or update the timestamp even).
# One must change the description or title also, and so
# I set the (hidden) description to the file timestamp.
#
# Testing with thunderbird 1.0.8 shows that it indexes on link
# and so wont ever show updates to other fields. Therefore I append #seconds
# to the link to force it to create a new entry for an updated item.
#
# Note you may find the http://www.pixelbeat.org/scripts/fix script
# useful for doing edits to files that you don't want to show up
# as updated content in the feed, or generally edit a file without
# changing the modification date.
num_files=10
site="www.pixelbeat.org"
description="latest from $site"
author="P@draigBrady.com (Pádraig Brady)"
suggested_update_freq=1440 #mins
#files starting with . | files without a . | files ending in .c .cpp ...
include_re='(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$' #only show these files
exclude_re='(priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum))' #don't show these paths
default_files="index.html index.shtml index.php"
############# No user serviceable parts below ###################
for file in $default_files; do
replace_default_files="$replace_default_files; s/\(.*\)$file$/\1/;t"
done
echo '
'
time=`date --rfc-2822`
echo "
$site$suggested_update_freq
http://$site/
http://www.pixelbeat.org/scripts/bashfeed$description$author$time`echo $LANG | sed 's/\(..\)_.*/\1/'`
"
find $1 -type f -printf "%P\t%T@\n" |
sort -k2,2nr |
cut -f1 |
grep -E "$include_re" |
grep -Ev "$exclude_re" |
while read file; do
if ! head "$file" | grep -Fiq ''; then
echo "$file"
i=$((i+1))
[ $i -eq $num_files ] && break
fi
done |
while read file; do
pubDate=`date --reference="$file" --rfc-2822`
force_update=`date --reference="$file" "+%s"`
title=""; keywords=""; description=""
if echo "$file" | grep -Eq '\.(html|shtml|php)$'; then
title=`sed -n 's/.*\(.*\)<\/title>.*/\1/ip;T;q' < "$file"`
keywords=`sed -n 's/.*$keyword\n "`
done
fi
if [ "$page_author" ]; then
if [ "$page_author" = "$author" ]; then
page_author=""
else
page_author=`echo -ne "$page_author\n "`
fi
fi
echo "
$titlehttp://$site/$file$pubDate
http://$site/$file#$force_update
$page_author$tags]]>
"
done
echo ''