641a28fbf0daff0ad1ad0f43d2c4b545cb6f9656,examples/reinforcement_learning/tutorial_cartpole_ac.py,,,#,148

Before Change



tl.layers.initialize_global_variables(sess)

if OUTPUT_GRAPH:
    tf.summary.FileWriter("logs/", sess.graph)

for i_episode in range(MAX_EPISODE):
    episode_time = time.time()
    s = env.reset()
    t = 0  // number of step in this episode

After Change


        a = actor.choose_action(s)

        s_new, r, done, info = env.step(a)
        s_new = s_new.astype(np.float32)

        if done: r = -20
        // these may helpful in some tasks
        // if abs(s_new[0]) >= env.observation_space.high[0]:
        // //  cart moves more than 2.4 units from the center
        //     r = -20
        // reward for the distance between cart to the center
        // r -= abs(s_new[0])  * .1

        all_r.append(r)

        td_error = critic.learn(s, r, s_new)  // learn Value-function : gradient = grad[r + lambda * V(s_new) - V(s)]
        actor.learn(s, a, td_error)  // learn Policy         : true_gradient = grad[logPi(s, a) * td_error]

        s = s_new
        t += 1

        if done or t >= MAX_EP_STEPS:
            ep_rs_sum = sum(all_r)

            if "running_reward" not in globals():
                running_reward = ep_rs_sum
            else:
                running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
            // start rending if running_reward greater than a threshold
            // if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True
            print("Episode: %d reward: %f running_reward %f took: %.5f" % \
                (i_episode, ep_rs_sum, running_reward, time.time() - episode_time))

            // Early Stopping for quick check
            if t >= MAX_EP_STEPS:
                print("Early Stopping")
                s = env.reset().astype(np.float32)
                rall = 0
                while True:
                    env.render()
                    // a = actor.choose_action(s)
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 6

Instances


Project Name: tensorlayer/tensorlayer
Commit Name: 641a28fbf0daff0ad1ad0f43d2c4b545cb6f9656
Time: 2019-02-16
Author: dhsig552@163.com
File Name: examples/reinforcement_learning/tutorial_cartpole_ac.py
Class Name:
Method Name:


Project Name: NifTK/NiftyNet
Commit Name: bd333dd43d69b26015eb3f201afe1772ba701a41
Time: 2018-05-07
Author: wenqi.li@ucl.ac.uk
File Name: niftynet/contrib/dataset_sampler/sampler_uniform_v2.py
Class Name: UniformSampler
Method Name: layer_op


Project Name: maciejkula/spotlight
Commit Name: fde2f66676f960782c993f7148927c4a4197ab10
Time: 2017-06-27
Author: maciej.kula@gmail.com
File Name: spotlight/factorization/explicit.py
Class Name: ExplicitFactorizationModel
Method Name: fit


Project Name: tensorlayer/tensorlayer
Commit Name: 641a28fbf0daff0ad1ad0f43d2c4b545cb6f9656
Time: 2019-02-16
Author: dhsig552@163.com
File Name: examples/reinforcement_learning/tutorial_cartpole_ac.py
Class Name:
Method Name: