implemented readability-support

This commit is contained in:
2010-10-30 00:21:24 +02:00
parent 3141f82df3
commit e4fb328d48
2 changed files with 240 additions and 0 deletions

View File

@ -8,6 +8,7 @@ import feedparser
import re
import sys
import urllib
import hn
Base = declarative_base()
@ -91,6 +92,7 @@ class Entry(Base):
enclosures = Column(Text)
fullpage = Column(Text)
readability = Column(Text)
lastfetched = Column(DateTime)
sent = Column(DateTime)
@ -125,6 +127,11 @@ session = Session()
#session.add(Feed('http://www.heise.de/newsticker/heise-atom.xml', 1, 0, 0, 1))
#session.add(Feed('http://blog.schatenseite.de/feed/', 1, 0, 0, 1))
def fetch_readability(link):
text = hn.upgradeLink(link)
text = text.decode('utf8')
return text
def fetch_full_page(link):
opener = urllib.FancyURLopener({})
response = opener.open(link)
@ -140,6 +147,8 @@ def process_feed_entry(feed, entry):
thisentry = Entry(entry)
if feed.fullpage == 1:
thisentry.fullpage = fetch_full_page(entry.link)
if feed.readability == 1:
thisentry.readability = fetch_readability(entry.link)
feed.entry.append(thisentry)
return "+"