#!/usr/bin/env python """ ch16_feed_delicious_recaps.py Insert del.icio.us link recaps into a normalized feed. """ import sys, time, urllib2, feedparser, xmltramp from httpcache import HTTPCache from xml.sax import SAXParseException from scraperlib import FeedEntryDict, Scraper from ch14_feed_normalizer import normalize_feed_meta, normalize_entries FEED_URL = 'http://www.decafbad.com/blog/atom.xml' def main(): """ Use the DeliciousFeed on a given feed. """ feed_url = ( len(sys.argv) > 2 ) and sys.argv[2] or FEED_URL f = DeliciousFeed(feed_url) f.STATE_FN = 'link_delicious_recaps_state' f.DEL_USER, f.DEL_PASSWD = \ open('delicious-acct.txt').read().strip().split(':') if len(sys.argv) > 1 and sys.argv[1] == 'rss': print f.scrape_rss() else: print f.scrape_atom() class DeliciousFeed(Scraper): """ Insert daily recaps of del.icio.us links as entries into a normalized feed. """ DEL_API_URL = "http://del.icio.us/api/posts/get?dt=%s" DEL_USER, DEL_PASSWD = "user", "passwd" NUM_DAYS = 6 DEL_ENTRY_TMPL = """ """ DEL_LINK_TMPL = """
  • %(description)s (%(tags)s)
    %(extended)s
  • """ DEL_TAG_TMPL = """%(tag)s """ def __init__(self, main_feed): """Initialize with the feed URI for parsing.""" self.main_feed = main_feed def produce_entries(self): """ Normalize the source feed, insert del.icio.us daily link recaps. """ # Grab and parse the feed feed = feedparser.parse(HTTPCache(self.main_feed).content()) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) self.FEED_META['feed.title'] += ' (with del.icio.us links)' # Normalize entries from the feed entries = normalize_entries(feed.entries) # Iterate through a number of past days' links for n in range(self.NUM_DAYS): # Calculate and format date for this query post_secs = time.time() - ( (n+1) * 24 * 60 * 60 ) post_time = time.localtime(post_secs) post_dt = time.strftime('%Y-%m-%d', post_time) # Prepare for Basic Authentication in calling del API auth = urllib2.HTTPBasicAuthHandler() auth.add_password('del.icio.us API', 'del.icio.us', self.DEL_USER, self.DEL_PASSWD) urllib2.install_opener(urllib2.build_opener(auth)) # Build del API URL, execute the query, and parse response. url = self.DEL_API_URL % post_dt data = HTTPCache(url).content() doc = xmltramp.parse(data) # Skip this day if no posts resulted from the query if not len(doc) > 0: continue # Iterate through all posts retrieved, build content for entry. post_out = [] for post in doc: # Run through post tags, render links with template. tags_out = [ self.DEL_TAG_TMPL % { 'tag' : t, 'href' : 'http://del.icio.us/%s/%s' % (self.DEL_USER, t) } for t in post("tag").split() ] # Build content for this link posting using template. try: extended = post('extended') except: extended = '' post_out.append(self.DEL_LINK_TMPL % { 'href' : post('href'), 'description' : post('description'), 'extended' : extended, 'tags' : ''.join(tags_out) }) # Construct and append a new feed entry based on the day's links new_entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={ 'title' : 'del.icio.us links on %s' % post_dt, 'issued' : post_secs, 'modified' : post_secs, 'link' : 'http://del.icio.us/%s#%s' % \ (self.DEL_USER, post_dt), 'summary' : self.DEL_ENTRY_TMPL % "\n".join(post_out) }) entries.append(new_entry) # Pause, because http://del.icio.us/doc/api says so. time.sleep(1) # Return the list of entries built return entries if __name__=='__main__': main()