4e144c9f842d7415d8be5bdbb5912d88ae32cced,pycorrector/seq2seq/corpus_reader.py,CGEDReader,read_tokens,#CGEDReader#,96
Before Change
def read_tokens (self, path, is_infer=False) :
with open (path, "r" , encoding="utf-8" ) as f:
dom_tree = minidom.parse(f)
docs = dom_tree.documentElement.getElementsByTagName("DOC" )
for doc in docs:
if is_infer:
After Change
for line in f:
if i % 2 == 1 :
if line and len (line) > 5:
yield line.lower () [5:].strip () .split ()
i + = 1
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 5
Instances Project Name: shibing624/pycorrector
Commit Name: 4e144c9f842d7415d8be5bdbb5912d88ae32cced
Time: 2018-04-16
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/corpus_reader.py
Class Name: CGEDReader
Method Name: read_tokens
Project Name: shibing624/pycorrector
Commit Name: 4e144c9f842d7415d8be5bdbb5912d88ae32cced
Time: 2018-04-16
Author: 507153809@qq.com
File Name: pycorrector/seq2seq/corpus_reader.py
Class Name: CGEDReader
Method Name: read_samples_by_string
Project Name: chainer/chainer
Commit Name: 75231a39c212fc8066f99633698b7e59b2ce4efb
Time: 2019-10-13
Author: duaipp@gmail.com
File Name: onnx_chainer/onnx_helper.py
Class Name:
Method Name: is_support_non_standard_domain