#!/usr/bin/env python
# ex:ts=2
# (c) 2004-2006 Tilman Linneweh
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the above copyright notice is
# retained.
#
# Getting started
# ###############
#
# * Type rssfetch.py -h to read the help message
# * Define a class rssitems in your stylesheet
#
# $Id: rssfetch.py,v 1.11 2006/03/08 18:44:21 arved Exp $
import feedparser
import re
from getopt import *
import sys
import cgi
version = "0.3"
##
# Customize titles:
# foo is replaced with bar
mytitles = [
# ['foo', 'bar'],
['Wibble', 'lauri - Wibble'],
['Autoblogiographie', 'Hendrik Scholz - Autoblogiographie'],
]
## RSSFetch class
class RSSFetch:
def htmlEncode(self,s):
"""Return the HTML encoded version of the given string.
This is useful to display a plain ASCII text string on a web page.
"""
codes = [
#['&', '&'],
['<', '<'],
['>', '>'],
['"', '"'],
# ['\n', '
']
]
for code in codes:
s = s.replace(code[0], code[1])
return s
def fixuptitle(self,s):
s = re.sub("\(mailto:.*","",s)
for title in mytitles:
s.replace(title[0], title[1])
return s
def processFeed(self,url,numberofitems,fw,adddate,debug):
tags = re.compile('<.*?>', re.M)
try:
d = feedparser.parse(url)
except:
print "Could not parse feed: "+ url
return -1
if debug:
print "Encoding: " + d.encoding
print d.feed
if d.feed.has_key('title'):
if d.feed.has_key('author') and d.feed.author != "":
htmltitle = cgi.escape((d.feed.author + " - " + d.feed.title).encode(d.encoding))
else:
htmltitle = cgi.escape(d.feed.title.encode(d.encoding))
htmltitle = self.fixuptitle(htmltitle)
htmltitle = tags.sub('', htmltitle)
else:
if debug: print "Title not found"
return -1
if debug:
print "Link: " + cgi.escape(d.feed.link)
print "Title: " + htmltitle
print >> fw, ""
print >> fw, htmltitle
print >> fw, ""
feeditems = len(d['entries'])
if feeditems <= numberofitems:
numberofitems = feeditems
if debug: print "Number of items: "+str(numberofitems)
print >> fw, "