self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="Critic/target")
// target net replacement
self.soft_replace = [[tf.assign(ta, (1 - TAU) * ta + TAU * ea), tf.assign(tc, (1 - TAU) * tc + TAU * ec)]
for ta, ea, tc, ec in zip(self.at_params, self.ae_params, self.ct_params, self.ce_params)]
q_target = self.R + GAMMA * q_
After Change
self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="Critic/target")
// target net replacement
self.soft_replace = [tf.assign(t, (1 - TAU) * t + TAU * e)
for t, e in zip(self.at_params + self.ct_params, self.ae_params + self.ce_params)]
q_target = self.R + GAMMA * q_
// in the feed_dic for the td_error, the self.a should change to actions in memory