#!/usr/bin/env python """ ch18_mod_event_feed_filter.py Enhance a feed with metadata harvested from entry content """ import sys, feedparser from scraperlib import FeedEntryDict, Scraper from ch14_feed_normalizer import normalize_feed_meta, normalize_entries from hcalendar import HCalendarParser FEED_IN_URL = 'file://./hcal.atom' FEED_OUT_FN = "www/www.decafbad.com/docs/private-feeds/mod-event.%s" def main(): """ Run a feed through the filter and produce the mod_event enhanced version. """ # Grab the incoming feed URL feed_url = ( len(sys.argv) > 1 ) and sys.argv[1] or FEED_IN_URL f = ModEventFeed(feed_url) f.STATE_FN = 'mod_event_feed_filter' # Output the current feed entries as both RSS and Atom open(FEED_OUT_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_OUT_FN % 'atom', 'w').write(f.scrape_atom()) class ModEventFeed(Scraper): """ Enhance feed metadata by parsing content. """ ATOM_FEED_TMPL = """ %(feed.title)s %(feed.tagline)s %(feed.modified)s %(feed.author.name)s %(feed.author.email)s %(feed.author.url)s %(feed.entries)s """ ATOM_ENTRY_TMPL = """ %(entry.title)s %(entry.issued)s %(entry.modified)s %(entry.id)s %(entry.ev_startdate)s %(entry.ev_enddate)s %(entry.summary)s """ RSS_FEED_TMPL = """ %(feed.title)s %(feed.link)s %(feed.tagline)s %(feed.author.email)s %(feed.entries)s """ RSS_ENTRY_TMPL = """ %(entry.title)s %(entry.link)s %(entry.modified)s %(entry.id)s %(entry.ev_startdate)s %(entry.ev_enddate)s %(entry.summary)s """ def __init__(self, main_feed): """Initialize with the feed URI for parsing.""" self.main_feed = main_feed def produce_entries(self): """ Get a feed, attempt to parse out hCalendar content and add mod_event metadata based on it. """ # Grab and parse the feed feed = feedparser.parse(self.main_feed) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) # Run through all the normalized entries... hp = HCalendarParser() entries = normalize_entries(feed.entries) for entry in entries: events = hp.parse(entry.data['summary']) if events: event = events[0] if 'dtstart' in event: dtstart = event.decoded('dtstart') entry.data['ev_startdate'] = \ dtstart.strftime('%Y-%m-%dT%H:%M:%SZ') if 'dtend' in event: dtend = event.decoded('dtend') entry.data['ev_enddate'] = \ dtend.strftime('%Y-%m-%dT%H:%M:%SZ') return entries if __name__=='__main__': main()