2a2b3aa7ef4ac140c8d55f7f68705225b2349955,Word2Vec.py,,cleanDataset,#,19
Before Change
for line in allLines:
tempStr = line.replace("\n"," ").lower()
myStr += re.sub("[.!?]","", tempStr)
return myStr, Counter(myStr.split())
def createTrainingMatrices(dictionary, corpus):
allUniqueWords = dictionary.keys()
allWords = corpus.split()
After Change
for line in allLines:
tempStr = line.replace("\n"," ").lower()
myStr += re.sub("[.!?]","", tempStr)
intermediateDict = Counter(myStr.split())
for word in intermediateDict:
numOccurences = intermediateDict[word]
if numOccurences <= unknownCutoff:
intermediateDict["<unk>"] += numOccurences
myStr.replace(word, "<unk>")
del intermediateDict[word]
return myStr, intermediateDict
def createTrainingMatrices(dictionary, corpus):
allUniqueWords = dictionary.keys()

In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 3
Instances
Project Name: adeshpande3/Facebook-Messenger-Bot
Commit Name: 2a2b3aa7ef4ac140c8d55f7f68705225b2349955
Time: 2017-07-11
Author: adeshpande3@g.ucla.edu
File Name: Word2Vec.py
Class Name:
Method Name: cleanDataset
Project Name: shibing624/pycorrector
Commit Name: f7daa2e81a84503f98058b62d068ef375427ec4e
Time: 2018-05-18
Author: 507153809@qq.com
File Name: pycorrector/rnn_lm/data_reader.py
Class Name:
Method Name: process_data
Project Name: 10XGenomics/cellranger
Commit Name: 0668acd5b56b4bd6c897428f7498bc9a6ee88226
Time: 2019-02-20
Author: hezx@users.noreply.github.com
File Name: mro/stages/counter/check_barcodes_compatibility/__init__.py
Class Name:
Method Name: join