2a2b3aa7ef4ac140c8d55f7f68705225b2349955,Word2Vec.py,,cleanDataset,#,19

Before Change


	for line in allLines:
	    tempStr = line.replace("\n"," ").lower()
	    myStr += re.sub("[.!?]","", tempStr)
	return myStr, Counter(myStr.split())

def createTrainingMatrices(dictionary, corpus):
	allUniqueWords = dictionary.keys()	
	allWords = corpus.split()

After Change


	for line in allLines:
	    tempStr = line.replace("\n"," ").lower()
	    myStr += re.sub("[.!?]","", tempStr)
	intermediateDict = Counter(myStr.split())
	for word in intermediateDict:
		numOccurences = intermediateDict[word]
		if numOccurences <= unknownCutoff:
			intermediateDict["<unk>"] += numOccurences
			myStr.replace(word, "<unk>")
			del intermediateDict[word]
	return myStr, intermediateDict

def createTrainingMatrices(dictionary, corpus):
	allUniqueWords = dictionary.keys()	
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 3

Instances


Project Name: adeshpande3/Facebook-Messenger-Bot
Commit Name: 2a2b3aa7ef4ac140c8d55f7f68705225b2349955
Time: 2017-07-11
Author: adeshpande3@g.ucla.edu
File Name: Word2Vec.py
Class Name:
Method Name: cleanDataset


Project Name: shibing624/pycorrector
Commit Name: f7daa2e81a84503f98058b62d068ef375427ec4e
Time: 2018-05-18
Author: 507153809@qq.com
File Name: pycorrector/rnn_lm/data_reader.py
Class Name:
Method Name: process_data


Project Name: 10XGenomics/cellranger
Commit Name: 0668acd5b56b4bd6c897428f7498bc9a6ee88226
Time: 2019-02-20
Author: hezx@users.noreply.github.com
File Name: mro/stages/counter/check_barcodes_compatibility/__init__.py
Class Name:
Method Name: join