db7b74579181f9cbae3583f447d83148714a1c3d,stanza/models/classifiers/cnn_classifier.py,CNNClassifier,forward,#CNNClassifier#Any#Any#,83
 Before Change 
                    new_word = word
                if new_word in self.vocab_map:
                    idx = torch.tensor(self.vocab_map[new_word], requires_grad=False, device=device)
                    input_vectors.append(self.embedding(idx) )
                    continue
                if new_word[-1] == """:After Change 
            begin_pad_width = random.randint(0, max_phrase_len - len(phrase))
            end_pad_width = max_phrase_len - begin_pad_width - len(phrase)
            indices = []
            unknowns = [] 
            for i in range(begin_pad_width):
                indices.append(PAD_ID)
            for word in phrase:
                // our current word vectors are all entirely lowercased
                word = word.lower()
                if word in self.vocab_map:
                    indices.append(self.vocab_map[word])
                    continue
                new_word = word.replace("-", "")
                // google vectors have words which are all dashes
                if len(new_word) == 0:
                    new_word = word
                if new_word in self.vocab_map:
                    indices.append(self.vocab_map[new_word])
                    continue
                if new_word[-1] == """:
                    new_word = new_word[:-1]
                    if new_word in self.vocab_map:
                        indices.append(self.vocab_map[new_word])
                        continue
                // TODO: split UNK based on part of speech?  might be an interesting experiment
                unknowns.append(len(indices))
                indices.append(PAD_ID)
            for i in range(end_pad_width):
                indices.append(PAD_ID)
            indices = torch.tensor(indices, requires_grad=False, device=device)
            input_vectors = self.embedding(indices)
            for unknown in unknowns:
                input_vectors[unknown, :] = self.unk
            // we will now have an N x emb_size tensor
            // this is the input to the CNN
            // there are two ways in which this padding is suboptimal
            // the first is that for short sentences, smaller windows will
            //   be padded to the point that some windows are entirely pad
            // the second is that a sentence S will have more or less padding
            //   depending on what other sentences are in its batch
            // we assume these effects are pretty minimal
            // reshape x to 1xNxE
             x = input_vectors.unsqueeze(0)
            input_tensor.append(x)
        x = torch.stack(input_tensor)
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances  Project Name: stanfordnlp/stanza
 Commit Name: db7b74579181f9cbae3583f447d83148714a1c3d
 Time: 2020-06-15
 Author: horatio@gmail.com
 File Name: stanza/models/classifiers/cnn_classifier.py
 Class Name: CNNClassifier
 Method Name: forward
 Project Name: analysiscenter/batchflow
 Commit Name: 4c50261df4847bdfd7c8067307e8532f96d04104
 Time: 2019-08-02
 Author: Tsimfer.SA@gazprom-neft.ru
 File Name: batchflow/models/torch/encoder_decoder.py
 Class Name: EncoderDecoder
 Method Name: body
 Project Name: dpressel/mead-baseline
 Commit Name: 71bd73748b835de5ae20bdc90ce4321e47f4c2b2
 Time: 2019-09-25
 Author: dpressel@gmail.com
 File Name: python/eight_mile/tf/layers.py
 Class Name: EmbeddingsStack
 Method Name: call