adeb1b1278619ff2d74d4fd82825e50a36f95ff4,allennlp/data/token_indexers/pretrained_transformer_indexer.py,PretrainedTransformerIndexer,_add_encoding_to_vocabulary_if_needed,#PretrainedTransformerIndexer#Any#,65

Before Change


        if hasattr(self._tokenizer, "vocab"):
            vocab_field_name = "vocab"
        elif hasattr(self._tokenizer, "encoder"):
            vocab_field_name = "encoder"
        else:
            logger.warning(
                Wasn"t able to fetch vocabulary from pretrained transformers lib.
                Neither <vocab> nor <encoder> are the valid fields for vocab.
                Your tokens will still be correctly indexed, but vocabulary file will not be saved.
            )
        if vocab_field_name is not None:
            pretrained_vocab = getattr(self._tokenizer, vocab_field_name)
            for word, idx in pretrained_vocab.items():
                vocab._token_to_index[self._namespace][word] = idx
                vocab._index_to_token[self._namespace][idx] = word

        self._added_to_vocabulary = True

    @overrides
    def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str, int]]):

After Change


        if vocab_field_name is not None:
            pretrained_vocab = getattr(self._tokenizer, vocab_field_name)
            if vocab_field_name == "sp_model":
                for idx in range(len(pretrained_vocab)):
                    word = pretrained_vocab.id_to_piece(idx)
                    vocab._token_to_index[self._namespace][word] = idx
                    vocab._index_to_token[self._namespace][idx] = word
            else:
                for word, idx in pretrained_vocab.items():
                    vocab._token_to_index[self._namespace][word] = idx
                    vocab._index_to_token[self._namespace][idx] = word
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances


Project Name: allenai/allennlp
Commit Name: adeb1b1278619ff2d74d4fd82825e50a36f95ff4
Time: 2020-03-27
Author: dirkg@allenai.org
File Name: allennlp/data/token_indexers/pretrained_transformer_indexer.py
Class Name: PretrainedTransformerIndexer
Method Name: _add_encoding_to_vocabulary_if_needed


Project Name: OpenNMT/OpenNMT-py
Commit Name: 8a70c277c355fe734a01fe58baafdc2dc5164205
Time: 2016-12-29
Author: alerer@fb.com
File Name: OpenNMT/train.py
Class Name:
Method Name: eval


Project Name: NTMC-Community/MatchZoo
Commit Name: 4bc0cb5d2924a63cf06f641b7cf36f799885f33f
Time: 2018-12-26
Author: 948280670@qq.com
File Name: matchzoo/processor_units/processor_units.py
Class Name: WordHashingUnit
Method Name: transform