46248d91fbec1af87b58502c7169d3d21ef47376,nltk_trainer/classification/scoring.py,,sum_category_word_scores,#,7

Before Change


from nltk_trainer import iteritems

def sum_category_word_scores(categorized_words, score_fn):
	word_fd = FreqDist()
	category_word_fd = ConditionalFreqDist()
	
	for category, words in categorized_words:
		for word in words:
			word_fd.inc(word)
			category_word_fd[category].inc(word)
	
	scores = collections.defaultdict(int)
	n_xx = category_word_fd.N()

After Change



def sum_category_word_scores(categorized_words, score_fn):
	word_fd = collections.Counter()
	category_word_fd = collections.defaultdict(collections.Counter)
	
	for category, words in categorized_words:
		for word in words:
			word_fd[word] += 1
			category_word_fd[category][word] += 1
	
	scores = collections.defaultdict(int)
	n_xx = sum(itertools.chain(*[fd.values() for fd in category_word_fd.values()]))
	
	for category in category_word_fd.keys():
		n_xi = sum(category_word_fd[category].values())
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances


Project Name: japerk/nltk-trainer
Commit Name: 46248d91fbec1af87b58502c7169d3d21ef47376
Time: 2014-04-21
Author: japerk@gmail.com
File Name: nltk_trainer/classification/scoring.py
Class Name:
Method Name: sum_category_word_scores


Project Name: japerk/nltk-trainer
Commit Name: 46248d91fbec1af87b58502c7169d3d21ef47376
Time: 2014-04-21
Author: japerk@gmail.com
File Name: nltk_trainer/classification/scoring.py
Class Name:
Method Name: sum_category_word_scores


Project Name: japerk/nltk-trainer
Commit Name: bc128d9596ed07d1c8d5d98f35b1f6905ad4d819
Time: 2014-01-05
Author: japerk@gmail.com
File Name: analyze_tagged_corpus.py
Class Name:
Method Name:


Project Name: japerk/nltk-trainer
Commit Name: 2ca3b0d5a88d414a87c343981b80ed1204b8dd8d
Time: 2014-01-05
Author: japerk@gmail.com
File Name: analyze_chunked_corpus.py
Class Name:
Method Name: