43a50ab6a3f717c738c8abba16779be59878bf4b,src/fonduer/parser/preprocessors/hocr_doc_preprocessor.py,HOCRDocPreprocessor,_parse_file,#HOCRDocPreprocessor#,45

Before Change


                            word.string.replace_with("".join(tokens))
                    word.unwrap()
            for parent in root.find_all(attrs={"fonduer": "1"}):
                if self.space:
                    parent.string.replace_with(" ".join(parent.stripped_strings))
                else:
                    parent.string.replace_with("".join(parent.stripped_strings))
                // Rmove the mark
                del parent["fonduer"]
        name = os.path.basename(fp)[: os.path.basename(fp).rfind(".")]
        stable_id = self._get_stable_id(name)
        yield Document(

After Change


                        if child.strip() == "":  // remove if space or linebreak
                            child.extract()
                        else:
                            tmp = re.sub(r"[\n\s]+", " " if self.space else "", child)
                            n = NavigableString(tmp.strip())
                            child.replace_with(n)
                del parent["fonduer"]

        name = os.path.basename(fp)[: os.path.basename(fp).rfind(".")]
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 5

Non-data size: 4

Instances


Project Name: HazyResearch/fonduer
Commit Name: 43a50ab6a3f717c738c8abba16779be59878bf4b
Time: 2020-10-06
Author: hiromu.hota@hal.hitachi.com
File Name: src/fonduer/parser/preprocessors/hocr_doc_preprocessor.py
Class Name: HOCRDocPreprocessor
Method Name: _parse_file


Project Name: commonsense/conceptnet5
Commit Name: 79d149dd39dc7e7d22c623c0a4a4d3ab99e61c76
Time: 2017-06-15
Author: joanna.teresa.duda@gmail.com
File Name: conceptnet5/vectors/transforms.py
Class Name:
Method Name: choose_small_vocabulary


Project Name: MycroftAI/mycroft-precise
Commit Name: 5ce56ff7e7f0c085bdff9745471c50aa4d0e1faa
Time: 2017-11-03
Author: matthew3311999@gmail.com
File Name: precise/stream.py
Class Name:
Method Name: main


Project Name: dirty-cat/dirty_cat
Commit Name: f70e71d5c7fdc8e25391e54e74c3402fb323ad5c
Time: 2018-06-06
Author: pierreglaser@msn.com
File Name: examples/plot_employee_salaries.py
Class Name:
Method Name:


Project Name: oddt/oddt
Commit Name: 86698c801848975de9a21fb17093e045b6271ea3
Time: 2018-05-17
Author: maciek@wojcikowski.pl
File Name: rdkit_fixer.py
Class Name:
Method Name: PreparePDBMol