from nltk_trainer import iteritems
def sum_category_word_scores(categorized_words, score_fn):
word_fd = FreqDist()
category_word_fd = ConditionalFreqDist()
for category, words in categorized_words:
for word in words:
word_fd.inc(word)
category_word_fd[category].inc(word)
scores = collections.defaultdict(int)
n_xx = category_word_fd.N()
After Change
from nltk_trainer import iteritems
def sum_category_word_scores(categorized_words, score_fn):
word_fd = collections.Counter()
category_word_fd = collections.defaultdict(collections.Counter)
for category, words in categorized_words:
for word in words:
word_fd[word] += 1
category_word_fd[category][word] += 1
scores = collections.defaultdict(int)
n_xx = sum(itertools.chain(*[fd.values() for fd in category_word_fd.values()]))