from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
SRC = Field(tokenize = "spacy",
tokenizer_language="de",
init_token = "<sos>",
eos_token = "<eos>",
lower = True)
TRG = Field(tokenize = "spacy",
tokenizer_language="en",
init_token = "<sos>",
eos_token = "<eos>",
lower = True)
train_data, valid_data, test_data = Multi30k.splits(exts = (".de", ".en"),
fields = (SRC, TRG))////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Now that we"ve defined ``train_data``, we can see an extremely useful// feature of ``torchtext``"s ``Field``: the ``build_vocab`` method// now allows us to create the vocabulary associated with each languageSRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Once these lines of code have been run, ``SRC.vocab.stoi`` will be a