bb9335bbc981c0541e37a875d79d0ef419008574,torchnlp/text_encoders/subword_encoder.py,SubwordEncoder,__init__,#SubwordEncoder#Any#Any#Any#Any#Any#,31
Before Change
max_occurrences=1e3):
self.append_eos = append_eos
if target_vocab_size is None:
self.tokenizer = SubwordTextTokenizer()
self.tokenizer.build_from_corpus(sample, min_count=min_occurrences)
else:
target_vocab_size -= len(RESERVED_ITOS)
self.tokenizer = SubwordTextTokenizer.build_to_target_size_from_corpus(
sample,
target_size=target_vocab_size,
min_val=min_occurrences,
max_val=max_occurrences)
self.stoi = RESERVED_STOI.copy()
self.itos = RESERVED_ITOS[:]
for token in self.tokenizer.vocab:
self.itos.append(token)
After Change
self.append_eos = append_eos
if target_vocab_size is None:
self.tokenizer = SubwordTextTokenizer()
self.tokenizer.build_from_corpus(sample, min_count=min_occurrences)
else:
target_vocab_size -= len(reserved_tokens)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 7
Instances
Project Name: PetrochukM/PyTorch-NLP
Commit Name: bb9335bbc981c0541e37a875d79d0ef419008574
Time: 2018-03-25
Author: petrochukm@gmail.com
File Name: torchnlp/text_encoders/subword_encoder.py
Class Name: SubwordEncoder
Method Name: __init__
Project Name: uber/ludwig
Commit Name: 116e474be75cdcf4c1d53b1c6540097766671b18
Time: 2020-03-27
Author: jimthompson5802@aol.com
File Name: ludwig/models/model.py
Class Name: Model
Method Name: __init__
Project Name: dmlc/gluon-nlp
Commit Name: f4275c0b80197e0f1bbd3a2a1a31cf07d85013b1
Time: 2019-01-09
Author: leonard@lausen.nl
File Name: scripts/word_embeddings/evaluate_pretrained.py
Class Name:
Method Name: