diff --git a/atomstrom.py b/atomstrom.py index dcc0916..4f58d15 100755 --- a/atomstrom.py +++ b/atomstrom.py @@ -14,6 +14,7 @@ import codecs import urllib2 #import hn import html2text +import HTMLParser import ConfigParser from argparse import ArgumentParser from email.header import Header @@ -165,6 +166,15 @@ def process_feed_entry(session, feed, entry): thisentry.fullpage = h2t.handle(thisentry.fullpage) elif feed.contentcolumn == 'readability': thisentry.readability = h2t.handle(thisentry.readability) + hp = HTMLParser.HTMLParser() + if thisentry.summary: + thisentry.summary = hp.unescape(thisentry.summary) + if thisentry.content: + thisentry.content = hp.unescape(thisentry.content) + if thisentry.fullpage: + thisentry.fullpage = hp.unescape(thisentry.fullpage) + if thisentry.readability: + thisentry.readability = hp.unescape(thisentry.readability) feed.entry.append(thisentry) session.commit() return 1