#!/usr/bin/env python
"""
amazonlib
Tools for generating feeds from Amazon Web Service requests.
"""
import md5, urllib, xmltramp
from scraperlib import FeedEntryDict, Scraper
class AmazonScraper(Scraper):
"""
Generates feeds from lists of products from Amazon Web
Services queries.
"""
AWS_URL = "http://webservices.amazon.com/onca/xml"
ITEM_TRACK = (
'ASIN',
'ItemAttributes.ListPrice.FormattedPrice'
)
TAG_DOMAIN = "www.decafbad.com"
TAG_DATE = "2005-03-06"
STATE_FN = "amazon_wishlist_state"
ATOM_ENTRY_TMPL = """
%(entry.title)s%(entry.author.name)s%(entry.issued)s%(entry.modified)s%(entry.id)s%(entry.summary)s
"""
TITLE_TMPL = \
"[%(ItemAttributes.ProductGroup)s] " + \
"(%(ItemAttributes.ListPrice.FormattedPrice)s) " + \
"%(ItemAttributes.Title)s - %(ItemAttributes.Author)s"
SUMMARY_TMPL = """
%(ItemAttributes.Title)s %(ItemAttributes.Author)s
"""
def produce_entries(self):
"""
Produce feed entries from Amazon product item data.
"""
entries = []
all_items = self.fetch_items()
# Run through all fetched items, building entries
for item in all_items:
# Wrap the item in a template-friendly object
tmpl_item = TrampTmplWrapper(item)
# Build an empty entry object
entry = FeedEntryDict(date_fmt=self.date_fmt)
# Generate an ID for this entry based on tracked data
m = md5.md5()
for k in self.ITEM_TRACK:
m.update(tmpl_item[k])
entry['id'] = state_id = "tag:%s,%s:%s" % \
(self.TAG_DOMAIN, self.TAG_DATE, m.hexdigest())
# Use the item detail URL for entry link
entry['link'] = tmpl_item['DetailPageURL']
# Use the author, artist, or actor name for item
# and entry author
authors = []
for k in ( 'Author', 'Artist', 'Actor' ):
v = tmpl_item['ItemAttributes.%s' % k]
if v: authors.append(v)
entry['author.name'] = ", ".join(authors)
# Build entry title and summary from string templates
entry['title'] = self.TITLE_TMPL % tmpl_item
entry['summary'] = self.SUMMARY_TMPL % tmpl_item
# Append completed entry to list
entries.append(entry)
return entries
class TrampTmplWrapper:
"""
Wrapper to provide dictionary-style access to xmltramp
nodes with dotted paths, for use in string templates.
"""
def __init__(self, node):
"""
Initialize with an xmltramp node.
"""
self.node = node
def __getitem__(self, path):
"""
Walk through xmltramp child nodes, given a dotted path.
Returns an empty string on a path not found.
"""
try:
# Walk through the path nodes, return end node as string.
curr = self.node
for p in path.split('.'):
curr = getattr(curr, p)
return str(curr)
except TypeError:
# HACK: Not intuitive, but this is what xmltramp throws
# for an attribute not found.
return ""