emb = self.word_lut(input)
if self.positional_encoding:
emb = emb + Variable(self.pe[:emb.size(0), :1, :emb.size(2)].expand_as(emb))
emb = emb * math.sqrt(emb.size(2))
// n.b. you can increase performance if you compute W_ih * x for all
// iterations in parallel, but that"s only possible if
// self.input_feed=False
After Change
if False:
if hidden:
outputs = outputs[hidden.size(0):]
attn = attn[:, hidden.size(0):].squeeze()
attn = torch.stack([attn])
attns["std"] = attn
if self._copy: