"""
def main():
"""
Scan Apache log and report new referrers found.
"""
# Construct the feed generator
f = LogBufferFeed(FEED_DIR)
f.MAX_AGE = 24 * 60 * 60 # 1 day
f.FEED_META['feed.title'] = '%s Referrering Links' % SITE_NAME
f.FEED_META['feed.tagline'] = \
'New referring links from Apache access.log on %s' % SITE_NAME
# Load up tail of access log, parse, and filter
new_lines = bookmark_tailgrep(ACCESS_LOG, max_initial_lines=100000)
all_events = parse_access_log(new_lines)
events = [ x for x in all_events if event_filter(x) ]
# Scan through latest events for new referrers
referrers_seen = shelve.open(REFER_SEEN)
new_referrers = []
for evt in events:
k = '%(referrer)s -> %(path)s' % evt
if not referrers_seen.has_key(k):
referrers_seen[k] = 1
new_referrers.append( (evt['referrer'], evt['path']) )
referrers_seen.close()
# If there were new referrers found, insert a new entry.
if len(new_referrers) > 0:
# Build a list of hyperlinks for referrers
links_out = [
LINK_TMPL % {
'SITE_ROOT' : SITE_ROOT,
'referrer' : x[0],
'path' : x[1],
}
for x in new_referrers
]
# Build a summary for this entry.
summary = SUMMARY_TMPL % {
'count' : len(new_referrers),
'links' : "\n".join(links_out)
}
# Construct and append a new entry
entry = FeedEntryDict({
'title' : '%s new referrers' % len(new_referrers),
'link' : '',
'summary' : summary
})
f.append_entry(entry)
# Output the current feed entries as both RSS and Atom
open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss())
open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())
def event_filter(event):
"""Filter events on exact and partial exclusion criteria"""
for field, blst in EXCLUDE_PARTIAL.items():
ev_val = event[field]
for bl_val in blst:
if ev_val.find(bl_val) != -1: return False
for field, blst in EXCLUDE_EXACT.items():
ev_val = event[field]
for bl_val in blst:
if ev_val == bl_val: return False
return True
ACCESS_RE = re.compile(\
'(?P\d+\.\d+\.\d+\.\d+) '
'(?P-|\w*) '
'(?P-|\w*) '
'\[(?P[^\[\]:]+):'
'(?P