bf03e74b9b0858f2e827e6da4b0ca8d960b3b5b6,soynlp/pos/_news_pos.py,NewsPOSExtractor,_extract_compound_nouns,#NewsPOSExtractor#,303
Before Change
for word, count in words.items():
tokens = tokenizer.tokenize(word, flatten=False)[0]
compound_parts = parse_compound(tokens)
if compound_parts:
word = "".join(compound_parts)
nouns_[word] = nouns_.get(word, 0) + count
if word in words:
words[word] = max(0, words.get(word, 0) - count)
words = {word:count for word, count in words.items()
if (not (word in nouns_)) and (count > 0)}
return nouns_, words
After Change
tokenizer = MaxScoreTokenizer(scores = {noun:1 for noun in nouns if len(noun) > 1})
compounds, removals = {}, set()
for word, count in eojeols.items():
// format: [(word, begin, end, score, length)]
tokens = tokenizer.tokenize(word, flatten=False)[0]
noun = parse_compound(tokens)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances
Project Name: lovit/soynlp
Commit Name: bf03e74b9b0858f2e827e6da4b0ca8d960b3b5b6
Time: 2018-11-13
Author: soy.lovit@gmail.com
File Name: soynlp/pos/_news_pos.py
Class Name: NewsPOSExtractor
Method Name: _extract_compound_nouns
Project Name: MTG/freesound
Commit Name: da8c3cd7660e1a4b7fcc0e196462a05271ef7fef
Time: 2019-05-07
Author: frederic.font@upf.edu
File Name: utils/audioprocessing/processing.py
Class Name:
Method Name: convert_to_pcm
Project Name: lingpy/lingpy
Commit Name: 02b1345a9b521639f8966f35b6103fcdecdb8f91
Time: 2013-09-16
Author: mattis.list@posteo.de
File Name: setup.py
Class Name:
Method Name: