implemented resolving of url redirects
This commit is contained in:
		
							
								
								
									
										20
									
								
								atomstrom.py
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								atomstrom.py
									
									
									
									
									
								
							@@ -9,6 +9,7 @@ import feedparser
 | 
			
		||||
import re
 | 
			
		||||
import sys
 | 
			
		||||
import urllib
 | 
			
		||||
import urllib2
 | 
			
		||||
import hn
 | 
			
		||||
import html2text
 | 
			
		||||
import ConfigParser
 | 
			
		||||
@@ -26,6 +27,7 @@ class Feed(Base):
 | 
			
		||||
    url = Column(Text)
 | 
			
		||||
    frequency = Column(Integer)
 | 
			
		||||
    daily = Column(Boolean)
 | 
			
		||||
    resolveredirects = Column(Boolean)
 | 
			
		||||
    readability = Column(Boolean)
 | 
			
		||||
    fullpage = Column(Boolean)
 | 
			
		||||
    html2textsummary = Column(Boolean)
 | 
			
		||||
@@ -101,6 +103,7 @@ class Entry(Base):
 | 
			
		||||
    author = Column(Text)
 | 
			
		||||
    enclosures = Column(Text)
 | 
			
		||||
 | 
			
		||||
    resolvedlink = Column(Text)
 | 
			
		||||
    fullpage = Column(Text)
 | 
			
		||||
    readability = Column(Text)
 | 
			
		||||
    updated = Column(DateTime)
 | 
			
		||||
@@ -173,10 +176,13 @@ def mail_daily_digest(session, sender, receiver, prefix):
 | 
			
		||||
    count = 0
 | 
			
		||||
    for feed, feedinfo, entry in entries:
 | 
			
		||||
        count = count + 1
 | 
			
		||||
        link = entry.link
 | 
			
		||||
        if entry.resolvedlink:
 | 
			
		||||
            link = entry.resolvedlink
 | 
			
		||||
        body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
 | 
			
		||||
        body = body + '   %s\n' % entry.title
 | 
			
		||||
        body = body + '%s\n' % get_entry_text(entry)[0:100]
 | 
			
		||||
        body = body + '%s\n\n' % entry.link
 | 
			
		||||
        body = body + '%s\n\n' % link
 | 
			
		||||
    if count > 0:
 | 
			
		||||
        today = datetime.now()
 | 
			
		||||
        subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
 | 
			
		||||
@@ -192,9 +198,12 @@ def mail_single_entry(feed, feedinfo, entry, sender, receiver, prefix):
 | 
			
		||||
    subject = '%s' % (entry.title)
 | 
			
		||||
    if prefix != '':
 | 
			
		||||
        subject = '%s %s' % (prefix, subject)
 | 
			
		||||
    link = entry.link
 | 
			
		||||
    if entry.resolvedlink:
 | 
			
		||||
        link = entry.resolvedlink
 | 
			
		||||
    body = '%s\n\n' % get_entry_text(entry)
 | 
			
		||||
    body = body + '%s\n' % feedinfo.link
 | 
			
		||||
    body = body + '%s\n' % entry.link
 | 
			
		||||
    body = body + '%s\n' % link
 | 
			
		||||
    send_mail(sender, receiver, subject, body)
 | 
			
		||||
    entry.sent = datetime.now()
 | 
			
		||||
 | 
			
		||||
@@ -242,6 +251,13 @@ def process_feed_entry(session, feed, entry):
 | 
			
		||||
    except Exception, e:
 | 
			
		||||
        print '  new entry <%s>' % entry.title
 | 
			
		||||
        thisentry = Entry(entry)
 | 
			
		||||
        if feed.resolveredirects:
 | 
			
		||||
            print '    fetching final link <%s>' % entry.link
 | 
			
		||||
            request = urllib2.Request(entry.link)
 | 
			
		||||
            opener = urllib2.build_opener()
 | 
			
		||||
            result = opener.open(request)
 | 
			
		||||
            thisentry.resolvedlink = result.url
 | 
			
		||||
            print '    final link: <%s>' % result.url
 | 
			
		||||
        if feed.fullpage:
 | 
			
		||||
            print '    fetching full page <%s>' % entry.link
 | 
			
		||||
            thisentry.fullpage = fetch_full_page(entry.link)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user