43a50ab6a3f717c738c8abba16779be59878bf4b,src/fonduer/parser/preprocessors/hocr_doc_preprocessor.py,HOCRDocPreprocessor,_parse_file,#HOCRDocPreprocessor#,45
Before Change
word.string.replace_with("".join(tokens))
word.unwrap()
for parent in root.find_all(attrs={"fonduer": "1"}):
if self.space:
parent.string.replace_with(" ".join(parent.stripped_strings))
else:
parent.string.replace_with("".join(parent.stripped_strings))
// Rmove the mark
del parent["fonduer"]
name = os.path.basename(fp)[: os.path.basename(fp).rfind(".")]
stable_id = self._get_stable_id(name)
yield Document(
After Change
if child.strip() == "": // remove if space or linebreak
child.extract()
else:
tmp = re.sub(r"[\n\s]+", " " if self.space else "", child)
n = NavigableString(tmp.strip())
child.replace_with(n)
del parent["fonduer"]
name = os.path.basename(fp)[: os.path.basename(fp).rfind(".")]
In pattern: SUPERPATTERN
Frequency: 5
Non-data size: 4
Instances
Project Name: HazyResearch/fonduer
Commit Name: 43a50ab6a3f717c738c8abba16779be59878bf4b
Time: 2020-10-06
Author: hiromu.hota@hal.hitachi.com
File Name: src/fonduer/parser/preprocessors/hocr_doc_preprocessor.py
Class Name: HOCRDocPreprocessor
Method Name: _parse_file
Project Name: commonsense/conceptnet5
Commit Name: 79d149dd39dc7e7d22c623c0a4a4d3ab99e61c76
Time: 2017-06-15
Author: joanna.teresa.duda@gmail.com
File Name: conceptnet5/vectors/transforms.py
Class Name:
Method Name: choose_small_vocabulary
Project Name: MycroftAI/mycroft-precise
Commit Name: 5ce56ff7e7f0c085bdff9745471c50aa4d0e1faa
Time: 2017-11-03
Author: matthew3311999@gmail.com
File Name: precise/stream.py
Class Name:
Method Name: main
Project Name: dirty-cat/dirty_cat
Commit Name: f70e71d5c7fdc8e25391e54e74c3402fb323ad5c
Time: 2018-06-06
Author: pierreglaser@msn.com
File Name: examples/plot_employee_salaries.py
Class Name:
Method Name:
Project Name: oddt/oddt
Commit Name: 86698c801848975de9a21fb17093e045b6271ea3
Time: 2018-05-17
Author: maciek@wojcikowski.pl
File Name: rdkit_fixer.py
Class Name:
Method Name: PreparePDBMol