adeb1b1278619ff2d74d4fd82825e50a36f95ff4,allennlp/data/token_indexers/pretrained_transformer_indexer.py,PretrainedTransformerIndexer,_add_encoding_to_vocabulary_if_needed,#PretrainedTransformerIndexer#Any#,65

Before Change


        if hasattr(self._tokenizer, "vocab"):
            vocab_field_name = "vocab"
        elif hasattr(self._tokenizer, "encoder"):
            vocab_field_name = "encoder"
        else:
            logger.warning(
                Wasn"t able to fetch vocabulary from pretrained transformers lib.
                Neither <vocab> nor <encoder> are the valid fields for vocab.
                Your tokens will still be correctly indexed, but vocabulary file will not be saved.
            )
        if vocab_field_name is not None:
            pretrained_vocab = getattr(self._tokenizer, vocab_field_name)
            for word, idx in pretrained_vocab.items():
                vocab._token_to_index[self._namespace][word] = idx
                vocab._index_to_token[self._namespace][idx] = word

        self._added_to_vocabulary = True

    @overrides
    def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str, int]]):

After Change


        if vocab_field_name is not None:
            pretrained_vocab = getattr(self._tokenizer, vocab_field_name)
            if vocab_field_name == "sp_model":
                for idx in range(len(pretrained_vocab)):
                    word = pretrained_vocab.id_to_piece(idx)
                    vocab._token_to_index[self._namespace][word] = idx
                    vocab._index_to_token[self._namespace][idx] = word
            else:
                for word, idx in pretrained_vocab.items():
                    vocab._token_to_index[self._namespace][word] = idx
                    vocab._index_to_token[self._namespace][idx] = word

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances

Link

Project Name: allenai/allennlp

Commit Name: adeb1b1278619ff2d74d4fd82825e50a36f95ff4

Time: 2020-03-27

Author: dirkg@allenai.org

File Name: allennlp/data/token_indexers/pretrained_transformer_indexer.py

Class Name: PretrainedTransformerIndexer

Method Name: _add_encoding_to_vocabulary_if_needed

Link

Project Name: OpenNMT/OpenNMT-py

Commit Name: 8a70c277c355fe734a01fe58baafdc2dc5164205

Time: 2016-12-29

Author: alerer@fb.com

File Name: OpenNMT/train.py

Class Name:

Method Name: eval

Link

Project Name: NTMC-Community/MatchZoo

Commit Name: 4bc0cb5d2924a63cf06f641b7cf36f799885f33f

Time: 2018-12-26

Author: 948280670@qq.com

File Name: matchzoo/processor_units/processor_units.py

Class Name: WordHashingUnit

Method Name: transform