#!/usr/bin/env python import ConfigParser, os, string, sys, StringIO, time, threading # Set up a module path that covers the global lib and all plugins BASE_PATH = os.path.normpath(os.path.join(\ os.path.abspath(sys.argv[0]), os.pardir, os.pardir)) sys.path.insert(0, os.path.join(BASE_PATH, 'lib')) import logging, logging.config, sqlite, iso8601 import dbagg import dbagg.storage.sqlite from spambayes import classifier, mboxutils, hammie, Corpus def item_to_msg(item): return "%s ^^^ %s" % (item.title, item.description) logging.config.fileConfig(os.path.join(BASE_PATH, 'conf', 'global.conf')) main_log = logging.getLogger("") main_log.info("Starting up."); # Configure data sources conn = sqlite.connect("data/agg") #, autocommit=1) sources = dbagg.storage.sqlite.SQLiteSourceCollection(conn) items = dbagg.storage.sqlite.SQLiteItemCollection(conn) # Configure the classifier shown_db = 'data/bayes-interesting.db' usedb = True h_shown = hammie.open(shown_db, usedb, 'c') # #since = '2003-07-02T20:37:59.45-05:00' since = '2003-08-12T00:00:00-05:00' offset = 0 max = 80000 main_log.info("Searching items...") shown_items = items.sql_iter( \ """ SELECT items.* FROM items WHERE (items.created > %s) ORDER BY items.created DESC LIMIT %s OFFSET %s """, ( since, max, offset ) ); # """, () ) main_log.info("scoring items") for item in shown_items: msg = item_to_msg(item) (prob, clues) = h_shown.score(msg, True) prob = round(prob, 3) item.set_meta('bayes:score', prob) if prob > 0.6: main_log.info("\tScore #%s %s (%s)" % (item.id, prob, item.title)) #main_log.info("\t\t%s" % (clues)) conn.commit()