#!/usr/bin/env python import ConfigParser, os, string, sys, StringIO, time, threading # Set up a module path that covers the global lib and all plugins BASE_PATH = os.path.normpath(os.path.join(\ os.path.abspath(sys.argv[0]), os.pardir, os.pardir)) sys.path.insert(0, os.path.join(BASE_PATH, 'lib')) import logging, logging.config, sqlite, iso8601 import dbagg import dbagg.storage.sqlite from spambayes import classifier, mboxutils, hammie, Corpus logging.config.fileConfig(os.path.join(BASE_PATH, 'conf', 'global.conf')) main_log = logging.getLogger("") main_log.info("Starting up."); # Configure data sources conn = sqlite.connect("data/agg", autocommit=1) sources = dbagg.storage.sqlite.SQLiteSourceCollection(conn) items = dbagg.storage.sqlite.SQLiteItemCollection(conn) # Configure the classifier shown_db = 'data/bayes-interesting.db' usedb = True classifier = hammie.open(shown_db, usedb, 'c') main_log.info("Searching for items...") train_items = items.sql_iter( \ """ SELECT items.* FROM items WHERE items.created > "2003-08-12T00:00:00" """, ( ) ); main_log.info("Training...") for item in train_items: trained = item.get_meta('bayes:trained') if trained == 'pos': classifier.train_spam(item.toMsg()) main_log.info("\tTrained pos #%s (%s)" % (item.id, item.title)) elif trained == 'neg': classifier.train_ham(item.toMsg()) main_log.info("\tTrained neg #%s (%s)" % (item.id, item.title)) classifier.store()