db7b74579181f9cbae3583f447d83148714a1c3d,stanza/models/classifiers/cnn_classifier.py,CNNClassifier,forward,#CNNClassifier#Any#Any#,83

Before Change


                    new_word = word
                if new_word in self.vocab_map:
                    idx = torch.tensor(self.vocab_map[new_word], requires_grad=False, device=device)
                    input_vectors.append(self.embedding(idx))
                    continue

                if new_word[-1] == """:

After Change


            begin_pad_width = random.randint(0, max_phrase_len - len(phrase))
            end_pad_width = max_phrase_len - begin_pad_width - len(phrase)
            indices = []
            unknowns = []
            for i in range(begin_pad_width):
                indices.append(PAD_ID)

            for word in phrase:
                // our current word vectors are all entirely lowercased
                word = word.lower()
                if word in self.vocab_map:
                    indices.append(self.vocab_map[word])
                    continue
                new_word = word.replace("-", "")
                // google vectors have words which are all dashes
                if len(new_word) == 0:
                    new_word = word
                if new_word in self.vocab_map:
                    indices.append(self.vocab_map[new_word])
                    continue

                if new_word[-1] == """:
                    new_word = new_word[:-1]
                    if new_word in self.vocab_map:
                        indices.append(self.vocab_map[new_word])
                        continue

                // TODO: split UNK based on part of speech?  might be an interesting experiment
                unknowns.append(len(indices))
                indices.append(PAD_ID)
            for i in range(end_pad_width):
                indices.append(PAD_ID)

            indices = torch.tensor(indices, requires_grad=False, device=device)
            input_vectors = self.embedding(indices)
            for unknown in unknowns:
                input_vectors[unknown, :] = self.unk

            // we will now have an N x emb_size tensor
            // this is the input to the CNN
            // there are two ways in which this padding is suboptimal
            // the first is that for short sentences, smaller windows will
            //   be padded to the point that some windows are entirely pad
            // the second is that a sentence S will have more or less padding
            //   depending on what other sentences are in its batch
            // we assume these effects are pretty minimal

            // reshape x to 1xNxE
            x = input_vectors.unsqueeze(0)
            input_tensor.append(x)
        x = torch.stack(input_tensor)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 4

Instances


Project Name: stanfordnlp/stanza
Commit Name: db7b74579181f9cbae3583f447d83148714a1c3d
Time: 2020-06-15
Author: horatio@gmail.com
File Name: stanza/models/classifiers/cnn_classifier.py
Class Name: CNNClassifier
Method Name: forward


Project Name: analysiscenter/batchflow
Commit Name: 4c50261df4847bdfd7c8067307e8532f96d04104
Time: 2019-08-02
Author: Tsimfer.SA@gazprom-neft.ru
File Name: batchflow/models/torch/encoder_decoder.py
Class Name: EncoderDecoder
Method Name: body


Project Name: dpressel/mead-baseline
Commit Name: 71bd73748b835de5ae20bdc90ce4321e47f4c2b2
Time: 2019-09-25
Author: dpressel@gmail.com
File Name: python/eight_mile/tf/layers.py
Class Name: EmbeddingsStack
Method Name: call