diff --git a/atomstrom.py b/atomstrom.py index 512cd7c..c752e2d 100755 --- a/atomstrom.py +++ b/atomstrom.py @@ -21,13 +21,15 @@ class Feed(Base): daily = Column(Boolean) readability = Column(Boolean) fullpage = Column(Boolean) + html2textsummary = Column(Boolean) enabled = Column(Boolean) - def __init__(self, url, daily, readability, fullpage, enabled): + def __init__(self, url, daily, readability, fullpage, enabled, html2textsummary): self.url = url self.daily = daily self.readability = readability self.fullpage = fullpage + self.html2textsummary = html2textsummary self.enabled = enabled def __repr__(self): @@ -190,7 +192,10 @@ def fetch_full_page(link): return text.encode('latin-1', 'replace') def process_feed_entry(feed, entry): - query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace')) + #query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace')) + title = entry.title.encode('latin-1', 'replace') + link = entry.link.encode('latin-1', 'replace') + query = session.query(Entry).filter(Entry.feed_id==feed.id).filter(Entry.title==title).filter(Entry.link==link) try: thisentry = query.one() thisentry.update(entry) @@ -205,6 +210,11 @@ def process_feed_entry(feed, entry): if feed.readability: print ' fetching readability <%s>' % entry.link thisentry.readability = fetch_readability(entry.link) + if feed.html2textsummary: + print ' converting summary' + summary = thisentry.summary.decode('latin-1') + summary = html2text.html2text(summary) + thisentry.summary = summary.encode('latin-1', 'replace') feed.entry.append(thisentry) return 1 @@ -227,7 +237,7 @@ def fetch_single_feed(feed): entries_total = entries_total + 1 entries_new = entries_new + process_feed_entry(feed, entry) session.commit() - print 'fetched %d from %d entries' % (entries_total, entries_new) + print 'updated %d of %d entries' % (entries_new, entries_total) def fetch_all_feeds(): print 'fetching all feeds...'