refactored processing of entries, enabled processing of enclosures
This commit is contained in:
58
atomstrom.py
58
atomstrom.py
@ -10,11 +10,6 @@ from ddate import ddate
|
||||
import feedparser
|
||||
import sys
|
||||
import codecs
|
||||
#import urllib
|
||||
import urllib2
|
||||
#import hn
|
||||
import html2text
|
||||
import HTMLParser
|
||||
import ConfigParser
|
||||
from argparse import ArgumentParser
|
||||
from email.header import Header
|
||||
@ -112,19 +107,6 @@ def mail_single_entries(session, sender, receiver, prefix):
|
||||
else:
|
||||
print 'no unmailed single entries found... not sending mail.'
|
||||
|
||||
def fetch_readability(link):
|
||||
text = hn.upgradeLink(link)
|
||||
text = text.decode('utf8')
|
||||
return text
|
||||
|
||||
def fetch_full_page(link):
|
||||
opener = urllib.FancyURLopener({})
|
||||
response = opener.open(link)
|
||||
html = response.read()
|
||||
html = html.decode('utf8')
|
||||
text = html2text.html2text(html)
|
||||
return text.encode('latin-1', 'replace')
|
||||
|
||||
def process_feed_entry(session, feed, entry):
|
||||
thisentry = session.query(Entry).\
|
||||
filter(Entry.title == entry.title).\
|
||||
@ -137,45 +119,7 @@ def process_feed_entry(session, feed, entry):
|
||||
return 0
|
||||
else:
|
||||
print ' new entry <%s>' % entry.title
|
||||
thisentry = Entry(entry)
|
||||
if feed.resolveredirects:
|
||||
print ' fetching final link <%s>' % entry.link
|
||||
request = urllib2.Request(entry.link)
|
||||
opener = urllib2.build_opener()
|
||||
result = opener.open(request)
|
||||
thisentry.resolvedlink = result.url
|
||||
print ' final link: <%s>' % result.url
|
||||
if feed.fullpage:
|
||||
print ' fetching full page <%s>' % entry.link
|
||||
thisentry.fullpage = fetch_full_page(entry.link)
|
||||
if feed.readability:
|
||||
print ' fetching readability <%s>' % entry.link
|
||||
thisentry.readability = fetch_readability(entry.link)
|
||||
if feed.html2textcontent:
|
||||
print ' converting summary'
|
||||
h2t = html2text.HTML2Text()
|
||||
h2t.body_width = 0
|
||||
h2t.inline_links = False
|
||||
if feed.html2textignoreimages:
|
||||
h2t.ignore_images = True
|
||||
if feed.contentcolumn == 'summary':
|
||||
thisentry.summary = h2t.handle(thisentry.summary)
|
||||
elif feed.contentcolumn == 'content':
|
||||
thisentry.content = h2t.handle(thisentry.content)
|
||||
elif feed.contentcolumn == 'fullpage':
|
||||
thisentry.fullpage = h2t.handle(thisentry.fullpage)
|
||||
elif feed.contentcolumn == 'readability':
|
||||
thisentry.readability = h2t.handle(thisentry.readability)
|
||||
hp = HTMLParser.HTMLParser()
|
||||
if thisentry.summary:
|
||||
thisentry.summary = hp.unescape(thisentry.summary)
|
||||
if thisentry.content:
|
||||
thisentry.content = hp.unescape(thisentry.content)
|
||||
if thisentry.fullpage:
|
||||
thisentry.fullpage = hp.unescape(thisentry.fullpage)
|
||||
if thisentry.readability:
|
||||
thisentry.readability = hp.unescape(thisentry.readability)
|
||||
feed.entries.append(thisentry)
|
||||
feed.entries.append(Entry(entry, feed))
|
||||
session.commit()
|
||||
return 1
|
||||
|
||||
|
Reference in New Issue
Block a user