implemented resolving of url redirects
This commit is contained in:
		
							
								
								
									
										20
									
								
								atomstrom.py
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								atomstrom.py
									
									
									
									
									
								
							@@ -9,6 +9,7 @@ import feedparser
 | 
				
			|||||||
import re
 | 
					import re
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import urllib
 | 
					import urllib
 | 
				
			||||||
 | 
					import urllib2
 | 
				
			||||||
import hn
 | 
					import hn
 | 
				
			||||||
import html2text
 | 
					import html2text
 | 
				
			||||||
import ConfigParser
 | 
					import ConfigParser
 | 
				
			||||||
@@ -26,6 +27,7 @@ class Feed(Base):
 | 
				
			|||||||
    url = Column(Text)
 | 
					    url = Column(Text)
 | 
				
			||||||
    frequency = Column(Integer)
 | 
					    frequency = Column(Integer)
 | 
				
			||||||
    daily = Column(Boolean)
 | 
					    daily = Column(Boolean)
 | 
				
			||||||
 | 
					    resolveredirects = Column(Boolean)
 | 
				
			||||||
    readability = Column(Boolean)
 | 
					    readability = Column(Boolean)
 | 
				
			||||||
    fullpage = Column(Boolean)
 | 
					    fullpage = Column(Boolean)
 | 
				
			||||||
    html2textsummary = Column(Boolean)
 | 
					    html2textsummary = Column(Boolean)
 | 
				
			||||||
@@ -101,6 +103,7 @@ class Entry(Base):
 | 
				
			|||||||
    author = Column(Text)
 | 
					    author = Column(Text)
 | 
				
			||||||
    enclosures = Column(Text)
 | 
					    enclosures = Column(Text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    resolvedlink = Column(Text)
 | 
				
			||||||
    fullpage = Column(Text)
 | 
					    fullpage = Column(Text)
 | 
				
			||||||
    readability = Column(Text)
 | 
					    readability = Column(Text)
 | 
				
			||||||
    updated = Column(DateTime)
 | 
					    updated = Column(DateTime)
 | 
				
			||||||
@@ -173,10 +176,13 @@ def mail_daily_digest(session, sender, receiver, prefix):
 | 
				
			|||||||
    count = 0
 | 
					    count = 0
 | 
				
			||||||
    for feed, feedinfo, entry in entries:
 | 
					    for feed, feedinfo, entry in entries:
 | 
				
			||||||
        count = count + 1
 | 
					        count = count + 1
 | 
				
			||||||
 | 
					        link = entry.link
 | 
				
			||||||
 | 
					        if entry.resolvedlink:
 | 
				
			||||||
 | 
					            link = entry.resolvedlink
 | 
				
			||||||
        body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
 | 
					        body = body + '=> %s - %s\n' % (entry.firstfetched.strftime('%y%m%d-%H%M'), feedinfo.title)
 | 
				
			||||||
        body = body + '   %s\n' % entry.title
 | 
					        body = body + '   %s\n' % entry.title
 | 
				
			||||||
        body = body + '%s\n' % get_entry_text(entry)[0:100]
 | 
					        body = body + '%s\n' % get_entry_text(entry)[0:100]
 | 
				
			||||||
        body = body + '%s\n\n' % entry.link
 | 
					        body = body + '%s\n\n' % link
 | 
				
			||||||
    if count > 0:
 | 
					    if count > 0:
 | 
				
			||||||
        today = datetime.now()
 | 
					        today = datetime.now()
 | 
				
			||||||
        subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
 | 
					        subject = '%s (%s) - %d entries' % (today.strftime('%y%m%d'), today.strftime('%A'), count)
 | 
				
			||||||
@@ -192,9 +198,12 @@ def mail_single_entry(feed, feedinfo, entry, sender, receiver, prefix):
 | 
				
			|||||||
    subject = '%s' % (entry.title)
 | 
					    subject = '%s' % (entry.title)
 | 
				
			||||||
    if prefix != '':
 | 
					    if prefix != '':
 | 
				
			||||||
        subject = '%s %s' % (prefix, subject)
 | 
					        subject = '%s %s' % (prefix, subject)
 | 
				
			||||||
 | 
					    link = entry.link
 | 
				
			||||||
 | 
					    if entry.resolvedlink:
 | 
				
			||||||
 | 
					        link = entry.resolvedlink
 | 
				
			||||||
    body = '%s\n\n' % get_entry_text(entry)
 | 
					    body = '%s\n\n' % get_entry_text(entry)
 | 
				
			||||||
    body = body + '%s\n' % feedinfo.link
 | 
					    body = body + '%s\n' % feedinfo.link
 | 
				
			||||||
    body = body + '%s\n' % entry.link
 | 
					    body = body + '%s\n' % link
 | 
				
			||||||
    send_mail(sender, receiver, subject, body)
 | 
					    send_mail(sender, receiver, subject, body)
 | 
				
			||||||
    entry.sent = datetime.now()
 | 
					    entry.sent = datetime.now()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -242,6 +251,13 @@ def process_feed_entry(session, feed, entry):
 | 
				
			|||||||
    except Exception, e:
 | 
					    except Exception, e:
 | 
				
			||||||
        print '  new entry <%s>' % entry.title
 | 
					        print '  new entry <%s>' % entry.title
 | 
				
			||||||
        thisentry = Entry(entry)
 | 
					        thisentry = Entry(entry)
 | 
				
			||||||
 | 
					        if feed.resolveredirects:
 | 
				
			||||||
 | 
					            print '    fetching final link <%s>' % entry.link
 | 
				
			||||||
 | 
					            request = urllib2.Request(entry.link)
 | 
				
			||||||
 | 
					            opener = urllib2.build_opener()
 | 
				
			||||||
 | 
					            result = opener.open(request)
 | 
				
			||||||
 | 
					            thisentry.resolvedlink = result.url
 | 
				
			||||||
 | 
					            print '    final link: <%s>' % result.url
 | 
				
			||||||
        if feed.fullpage:
 | 
					        if feed.fullpage:
 | 
				
			||||||
            print '    fetching full page <%s>' % entry.link
 | 
					            print '    fetching full page <%s>' % entry.link
 | 
				
			||||||
            thisentry.fullpage = fetch_full_page(entry.link)
 | 
					            thisentry.fullpage = fetch_full_page(entry.link)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user