""" """ # $Header: /cvsroot/dbagg2/lib/dbagg2/recommend.py,v 1.3 2004/05/12 02:16:44 deusx Exp $ import sys, os, os.path, time, traceback, shelve from mx.DateTime import * from dbagg2.config import * from dbagg2.model import * from mx.DateTime import * from spambayes import classifier, mboxutils, hammie, Corpus, storage from spambayes.hammie import Hammie global log, oneHour log = logging.getLogger("%s"%__name__) def getHammie(): if conf.get('data', 'driver') == 'mysql': host=conf.get('data_mysql', 'host') user=conf.get('data_mysql', 'user') passwd=conf.get('data_mysql', 'passwd') db=conf.get('data_mysql', 'db') dsn = "host=%s user=%s pass=%s dbname=%s" % \ (host, user, passwd, db) log.info("Opening ham store in MySQL: %s" % (dsn)) ham_store = storage.mySQLClassifier(dsn) else: log.error("Can't open a storage type for Hammie") return return Hammie(ham_store) def train(): classifier = getHammie() #seen_db = os.path.join('data', 'bayes-seen') #seen = shelve.open(seen_db) items = Item.select( (Item.q.created < (now() - oneDay)) & (Item.q.created > (now() - (oneDay*28))) ) #for item in Item.select(): #items: for item in items: try: msg = toMsg(item) if item.visited: log.info("\tTraining pos #%s (%s)" % (item.id, item.title[:25])) classifier.train_ham(msg) else: log.info("\tTraining neg #%s (%s)" % (item.id, item.title[:25])) classifier.train_spam(msg) #classifier.store() except: (e_type, e_val, e_tb) = sys.exc_info() exc = ''.join(traceback.format_exception(e_type, e_val, e_tb)) log.error("Unexpected exception:\n%s" % (exc)) classifier.store() def score(): classifier = getHammie() items = Item.select( (Item.q.created > (now() - (oneHour*2))), orderBy='-created' ) for item in items: try: msg = toMsg(item) (prob, clues) = classifier.score(msg, True) # Invert and round off the prob prob = round(1-prob, 3) item.rating = prob if prob < 0.7: log.info("\tScore #%s %s (%s)" % (item.id, prob, item.title)) #log.info("\tScore #%s %s [%s] (%s)" % (item.id, prob, classifier.formatclues(clues), item.title)) except: (e_type, e_val, e_tb) = sys.exc_info() exc = ''.join(traceback.format_exception(e_type, e_val, e_tb)) log.error("Unexpected exception:\n%s" % (exc)) def toMsg(item): """ Return a representation of the item which vaguely resembles an email message. """ out = [] out.append("X-Link: %s" % (item.link)) out.append("X-Item-Id: %s" % (item.id)) out.append("From: %s" % (item.source.title.encode('ascii', 'ignore'))) out.append("Subject: %s" % (item.title.encode('ascii', 'ignore'))) out.append("Date: %s" % (item.created)) out.append("") out.append("%s" % item.description.encode('ascii', 'ignore')) return "\n".join(out) if __name__ == '__main__': if len(sys.argv) < 2: train() elif sys.argv[1] == 'score': score() elif sys.argv[1] == 'train': train() else: train()