144b5afdd5d9262a5a9a9fdd9afa56a14b629ba1,texar/modules/decoders/transformer_decoders.py,TransformerDecoder,_build,#TransformerDecoder#Any#Any#Any#Any#,78

Before Change


        for i in range(self._hparams.num_blocks):
            with tf.variable_scope("num_blocks_{}".format(i)):
                with tf.variable_scope("self_attention"):
                    dec = layers.layer_normalize(self.dec)
                    dec = layers.multihead_attention(
                        queries=dec,
                        keys=dec,
                        queries_valid_length=tgt_length,
                        keys_valid_length=tgt_length,
                        num_units=self._hparams.embedding.dim,
                        num_heads=self._hparams.num_heads,
                        dropout_rate=self._hparams.dropout,
                        causality=True,
                        scope="self_attention")
                    dec = tf.layers.dropout(
                        dec,
                        rate=self._hparams.dropout,
                        training=context.is_train())
                    self.dec += dec

                with tf.variable_scope("encdec_attention"):
                    dec = layers.layer_normalize(self.dec)
                    dec = layers.multihead_attention(
                        queries=dec,
                        keys=encoder_output,
                        queries_valid_length=tgt_length,
                        keys_valid_length=src_length,
                        num_units=self._hparams.embedding.dim,
                        num_heads=self._hparams.num_heads,
                        dropout_rate=self._hparams.dropout,
                        causality=False,
                        scope="multihead_attention")
                    dec = tf.layers.dropout(
                        dec,
                        rate=self._hparams.dropout,
                        training=context.is_train())
                    self.dec += dec

                poswise_network = FeedForwardNetwork(hparams=self._hparams["poswise_feedforward"])
                with tf.variable_scope(poswise_network.variable_scope):
                    dec = layers.layer_normalize(self.dec)
                    dec = poswise_network(dec)
                    dec = tf.layers.dropout(
                        dec,
                        rate=self._hparams.dropout,
                        training=context.is_train())
                    self.dec += dec

        // share the projection weight with word embedding

        if self._hparams.share_embed_and_transform:
            batch_size, length= tf.shape(self.dec)[0], tf.shape(self.dec)[1]
            depth = self.dec.get_shape()[2]
            self.dec = tf.reshape(self.dec, [-1, depth])
            self.logits = tf.matmul(self.dec, self._output_transform)
            self.logits = tf.reshape(self.logits, [batch_size, length, self._vocab_size])
        else:

After Change


            self.dec += tf.nn.embedding_lookup(self.position_dec_embedding,\
                tf.tile(tf.expand_dims(tf.range(tf.shape(inputs)[1]), 0), [tf.shape(inputs)[0], 1]))

        self.dec = tf.layers.dropout(
            self.dec,
            rate=self._hparams.dropout,
            training=context.is_train()
        )

        for i in range(self._hparams.num_blocks):
            with tf.variable_scope("num_blocks_{}".format(i)):
                with tf.variable_scope("self_attention"):
                    selfatt_output = layers.multihead_attention(
                        queries=layers.layer_normalize(self.dec),
                        keys=None,
                        queries_valid_length=tgt_length,
                        keys_valid_length=tgt_length,
                        num_units=self._hparams.embedding.dim,
                        num_heads=self._hparams.num_heads,
                        dropout_rate=self._hparams.dropout,
                        causality=True,
                        scope="self_attention")
                    self.dec = self.dec + tf.layers.dropout(
                        selfatt_output,
                        rate=self._hparams.dropout,
                        training=context.is_train()
                    )

                with tf.variable_scope("encdec_attention"):
                    encdec_output = layers.multihead_attention(
                        queries=layers.layer_normalize(self.dec),
                        keys=encoder_output,
                        queries_valid_length=tgt_length,
                        keys_valid_length=src_length,
                        num_units=self._hparams.embedding.dim,
                        num_heads=self._hparams.num_heads,
                        dropout_rate=self._hparams.dropout,
                        causality=False,
                        scope="multihead_attention")
                    self.dec = self.dec + tf.layers.dropout(encdec_output, \
                        rate=self._hparams.dropout,
                        training=context.is_train()
                    )
                poswise_network = FeedForwardNetwork(hparams=self._hparams["poswise_feedforward"])
                with tf.variable_scope(poswise_network.variable_scope):
                    sub_output = tf.layers.dropout(
                        poswise_network(layers.layer_normalize(self.dec)),
                        rate=self._hparams.dropout,
                        training=context.is_train()
                    )
                    self.dec = self.dec + sub_output

        self.dec = layers.layer_normalize(self.dec)
        // share the projection weight with word embedding

        if self._hparams.share_embed_and_transform:
            batch_size, length= tf.shape(self.dec)[0], tf.shape(self.dec)[1]
            depth = self.dec.get_shape()[2]
            self.dec = tf.reshape(self.dec, [-1, depth])
            self.logits = tf.matmul(self.dec, self._output_transform)
            self.logits = tf.reshape(self.logits, [batch_size, length, self._vocab_size])
        else:

In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 9

Instances

Link

Project Name: asyml/texar

Commit Name: 144b5afdd5d9262a5a9a9fdd9afa56a14b629ba1

Time: 2018-03-19

Author: zhiting.hu@petuum.com

File Name: texar/modules/decoders/transformer_decoders.py

Class Name: TransformerDecoder

Method Name: _build

Link

Project Name: asyml/texar

Commit Name: 144b5afdd5d9262a5a9a9fdd9afa56a14b629ba1

Time: 2018-03-19

Author: zhiting.hu@petuum.com

File Name: texar/modules/decoders/transformer_decoders.py

Class Name: TransformerDecoder

Method Name: _build

Link

Project Name: asyml/texar

Commit Name: 144b5afdd5d9262a5a9a9fdd9afa56a14b629ba1

Time: 2018-03-19

Author: zhiting.hu@petuum.com

File Name: texar/modules/encoders/transformer_encoders.py

Class Name: TransformerEncoder

Method Name: _build

Link

Project Name: asyml/texar

Commit Name: b5b06c0f262413ef62c4bfff996f3189673507b1

Time: 2018-03-23

Author: zhiting.hu@petuum.com

File Name: texar/modules/encoders/transformer_encoders.py

Class Name: TransformerEncoder

Method Name: _build