641a28fbf0daff0ad1ad0f43d2c4b545cb6f9656,examples/reinforcement_learning/tutorial_cartpole_ac.py,Actor,learn,#Actor#Any#Any#Any#,106
Before Change
// self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v) // minimize(-exp_v) = maximize(exp_v)
def learn(self, s, a, td):
_, exp_v = self.sess.run([self.train_op, self.exp_v], {self.s: [s], self.a: [a], self.td_error: td[0]})
return exp_v
def choose_action(self, s):
After Change
def learn(self, s, a, td):
// _, exp_v = self.sess.run([self.train_op, self.exp_v], {self.s: [s], self.a: [a], self.td_error: td[0]})
with tf.GradientTape() as tape:
_logits = self.model([s]).outputs
// _probs = tf.nn.softmax(_logits)
_exp_v = tl.rein.cross_entropy_reward_loss(logits=_logits, actions=[a], rewards=td[0])
grad = tape.gradient(_exp_v, self.model.weights)
self.optimizer.apply_gradients(zip(grad, self.model.weights))
return _exp_v
In pattern: SUPERPATTERN
Frequency: 4
Non-data size: 3
Instances
Project Name: tensorlayer/tensorlayer
Commit Name: 641a28fbf0daff0ad1ad0f43d2c4b545cb6f9656
Time: 2019-02-16
Author: dhsig552@163.com
File Name: examples/reinforcement_learning/tutorial_cartpole_ac.py
Class Name: Actor
Method Name: learn
Project Name: deepchem/deepchem
Commit Name: 97d7f88df32ae58875d7534c7c517905a3eb1341
Time: 2020-11-05
Author: mufeili1996@gmail.com
File Name: deepchem/models/torch_models/gat.py
Class Name: GAT
Method Name: forward
Project Name: tensorlayer/tensorlayer
Commit Name: 641a28fbf0daff0ad1ad0f43d2c4b545cb6f9656
Time: 2019-02-16
Author: dhsig552@163.com
File Name: examples/reinforcement_learning/tutorial_cartpole_ac.py
Class Name: Critic
Method Name: learn
Project Name: deepchem/deepchem
Commit Name: f766c92a63fb67a99744dcb82c8c78406fbf905a
Time: 2020-11-04
Author: mufeili1996@gmail.com
File Name: deepchem/models/torch_models/gat.py
Class Name: GAT
Method Name: forward