improved debug output, included html2text
This commit is contained in:
35
atomstrom.py
35
atomstrom.py
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey
|
||||
from sqlalchemy import create_engine, Table, Column, Integer, Text, Boolean, DateTime, MetaData, ForeignKey, desc
|
||||
from sqlalchemy.orm import sessionmaker, relation, backref
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
import datetime
|
||||
@ -9,6 +9,7 @@ import re
|
||||
import sys
|
||||
import urllib
|
||||
import hn
|
||||
import html2text
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
@ -150,7 +151,7 @@ def mail_daily_digest():
|
||||
sender = 'atomstrom'
|
||||
body = ''
|
||||
count = 0
|
||||
for feed, feedinfo, entry in session.query(Feed, Feedinfo, Entry).filter(Feed.id==Feedinfo.feed_id).filter(Feed.id==Entry.feed_id).filter(Feed.enabled==1).filter(Feed.daily==1).order_by(Entry.firstfetched).all():
|
||||
for feed, feedinfo, entry in session.query(Feed, Feedinfo, Entry).filter(Feed.id==Feedinfo.feed_id).filter(Feed.id==Entry.feed_id).filter(Feed.enabled==1).filter(Feed.daily==1).order_by(desc(Entry.firstfetched)).all():
|
||||
count = count + 1
|
||||
body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
|
||||
body = body + ' %s\n' % entry.title
|
||||
@ -183,22 +184,29 @@ def fetch_readability(link):
|
||||
def fetch_full_page(link):
|
||||
opener = urllib.FancyURLopener({})
|
||||
response = opener.open(link)
|
||||
return response.read()
|
||||
html = response.read()
|
||||
html = html.decode('utf8')
|
||||
text = html2text.html2text(html)
|
||||
return text.encode('latin-1', 'replace')
|
||||
|
||||
def process_feed_entry(feed, entry):
|
||||
query = session.query(Entry).filter_by(feed_id=feed.id, title=entry.title.encode('latin-1', 'replace'))
|
||||
try:
|
||||
thisentry = query.one()
|
||||
thisentry.update(entry)
|
||||
return '-'
|
||||
print ' entry already known <%s>' % entry.title
|
||||
return 0
|
||||
except Exception, e:
|
||||
print ' new entry <%s>' % entry.title
|
||||
thisentry = Entry(entry)
|
||||
if feed.fullpage:
|
||||
print ' fetching full page <%s>' % entry.link
|
||||
thisentry.fullpage = fetch_full_page(entry.link)
|
||||
if feed.readability:
|
||||
print ' fetching readability <%s>' % entry.link
|
||||
thisentry.readability = fetch_readability(entry.link)
|
||||
feed.entry.append(thisentry)
|
||||
return '+'
|
||||
return 1
|
||||
|
||||
def fetch_single_feed(feed):
|
||||
print 'fetching %s' % feed.url
|
||||
@ -209,18 +217,17 @@ def fetch_single_feed(feed):
|
||||
feed.feedinfo = query.one()
|
||||
feed.feedinfo.update(parser)
|
||||
except Exception, e:
|
||||
print 'this feed seems to be new'
|
||||
feed.feedinfo = Feedinfo(parser)
|
||||
|
||||
print 'processing feed entries: ',
|
||||
print 'processing feed entries:'
|
||||
entries_new = 0
|
||||
entries_total = 0
|
||||
for entry in parser.entries:
|
||||
entries_total = entries_total + 1
|
||||
ret = process_feed_entry(feed, entry)
|
||||
if ret == '+':
|
||||
entries_new = entries_new + 1
|
||||
sys.stdout.write(ret)
|
||||
print ' (%d/%d new)' % (entries_new, entries_total)
|
||||
entries_new = entries_new + process_feed_entry(feed, entry)
|
||||
session.commit()
|
||||
print 'fetched %d from %d entries' % (entries_total, entries_new)
|
||||
|
||||
def fetch_all_feeds():
|
||||
print 'fetching all feeds...'
|
||||
@ -229,8 +236,8 @@ def fetch_all_feeds():
|
||||
print
|
||||
|
||||
if __name__ == '__main__':
|
||||
#fetch_all_feeds()
|
||||
#mail_single_entries()
|
||||
mail_daily_digest()
|
||||
fetch_all_feeds()
|
||||
mail_single_entries()
|
||||
#mail_daily_digest()
|
||||
|
||||
session.commit()
|
||||
|
Reference in New Issue
Block a user