#!/usr/bin/env python
"""
ch16_feed_amazon_ads.py
Insert Amazon links into a normalized feed.
"""
import sys, urllib, feedparser, xmltramp
from xml.sax import SAXParseException
from httpcache import HTTPCache
from scraperlib import FeedEntryDict, Scraper
from ch14_feed_normalizer import normalize_feed_meta, normalize_entries
FEED_URL = 'http://www.decafbad.com/blog/atom.xml'
def main():
"""
Use the AmazonAdFeed on a given feed.
"""
feed_url = ( len(sys.argv) > 2 ) and sys.argv[2] or FEED_URL
f = AmazonAdFeed(feed_url)
f.STATE_FN = 'link_amazon_ads_state'
if len(sys.argv) > 1 and sys.argv[1] == 'rss':
print f.scrape_rss()
else:
print f.scrape_atom()
class AmazonAdFeed(Scraper):
"""
Insert amazon_ads links found via Technorati search into a
normalized feed.
"""
AMAZON_KEY = open("amazon-key.txt", "r").read().strip()
ASSOCIATE_TAG = '0xdecafbad-20'
MAX_ITEMS = 3
INSERT_TMPL = """
Possibly Related Amazon Items:
"""
INSERT_ITEM_TMPL = """
%(title)s
"""
def __init__(self, main_feed):
"""Initialize with the feed URI for parsing."""
self.main_feed = main_feed
def produce_entries(self):
"""
Use FeedNormalizer to get feed entries, then merge
the lists together.
"""
# Grab and parse the feed
feed = feedparser.parse(HTTPCache(self.main_feed).content())
# Normalize feed meta data
self.FEED_META = normalize_feed_meta(feed, self.date_fmt)
self.FEED_META['feed.title'] += ' (with Amazon items)'
# Normalize entries from the feed
entries = normalize_entries(feed.entries)
# Run through all the normalized entries...
for e in entries:
# Perform a search on the entry title, extract the items
result = self.amazon_search(e['summary'])
items = [ x for x in result.Items if 'Item' in x._name ]
# Use each search result item to populate the templates.
insert_items = [ self.INSERT_ITEM_TMPL % {
'title' : i.ItemAttributes.Title,
'url' : i.DetailPageURL,
'img' : i.SmallImage.URL
} for i in items[:self.MAX_ITEMS] ]
insert_out = self.INSERT_TMPL % '\n'.join(insert_items)
# Append the rendered search results onto the entry summary.
e.data['summary'] += insert_out.decode('utf-8', 'ignore')
return entries
def amazon_search(self, query):
"""
Given a query string, perform an Amazon search.
"""
# Construct an Amazon search URL and fetch it.
args = {
'SubscriptionId' : self.AMAZON_KEY,
'AssociateTag' : self.ASSOCIATE_TAG,
'Service' : 'AWSECommerceService',
'Operation' : 'ItemSearch',
'ResponseGroup' : 'Medium,ItemAttributes',
'SearchIndex' : 'Books',
'TextStream' : query
}
url = "http://webservices.amazon.com/onca/xml?%s" % \
urllib.urlencode(args)
# Parse and return the results of the search
data = HTTPCache(url).content()
doc = xmltramp.parse(data)
return doc
if __name__=='__main__': main()