4f2535df9cb702854ef892c0d2a92ef068636ce0,examples/reinforcement_learning/baselines/algorithms/td3/td3.py,,learn,#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#,271

Before Change



        td3_trainer.load_weights()

        while frame_idx < test_frames:
            state = env.reset()
            state = state.astype(np.float32)
            episode_reward = 0
            if frame_idx < 1:
                _ = td3_trainer.policy_net(
                    [state]
                )  // need an extra call to make inside functions be able to use forward
                _ = td3_trainer.target_policy_net([state])

            for step in range(max_steps):
                action = td3_trainer.policy_net.get_action(state, explore_noise_scale=1.0)
                next_state, reward, done, _ = env.step(action)
                next_state = next_state.astype(np.float32)
                env.render()
                done = 1 if done ==True else 0

                state = next_state
                episode_reward += reward
                frame_idx += 1

                // if frame_idx % 50 == 0:
                //     plot(frame_idx, rewards)

                if done:
                    break
            episode = int(frame_idx / max_steps)
            all_episodes = int(test_frames / max_steps)
            print("Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}"\
            .format(episode, all_episodes, episode_reward, time.time()-t0 ) )
            rewards.append(episode_reward)

After Change


        frame_idx = 0
        rewards = []
        t0 = time.time()
        for eps in range(train_episodes):
            state = env.reset()
            state = state.astype(np.float32)
            episode_reward = 0
            if frame_idx < 1:
                _ = td3_trainer.policy_net(
                    [state]
                )  // need an extra call here to make inside functions be able to use model.forward
                _ = td3_trainer.target_policy_net([state])

            for step in range(max_steps):
                if frame_idx > explore_steps:
                    action = td3_trainer.policy_net.get_action(state, explore_noise_scale=1.0)
                else:
                    action = td3_trainer.policy_net.sample_action()

                next_state, reward, done, _ = env.step(action)
                next_state = next_state.astype(np.float32)
                env.render()
                done = 1 if done ==True else 0

                replay_buffer.push(state, action, reward, next_state, done)

                state = next_state
                episode_reward += reward
                frame_idx += 1

                if len(replay_buffer) > batch_size:
                    for i in range(update_itr):
                        td3_trainer.update(batch_size, eval_noise_scale=0.5, reward_scale=1.)

                if done:
                    break

            if eps % int(save_interval) == 0:
                plot(rewards, Algorithm_name="TD3", Env_name=env_id)
                td3_trainer.save_weights()

            print("Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}"\
            .format(eps, train_episodes, episode_reward, time.time()-t0 ))
            rewards.append(episode_reward)
        td3_trainer.save_weights()

    if mode=="test":
        frame_idx = 0
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 4

Non-data size: 7

Instances


Project Name: tensorlayer/tensorlayer
Commit Name: 4f2535df9cb702854ef892c0d2a92ef068636ce0
Time: 2019-07-04
Author: 1402434478@qq.com
File Name: examples/reinforcement_learning/baselines/algorithms/td3/td3.py
Class Name:
Method Name: learn


Project Name: hanxiao/bert-as-service
Commit Name: f0d581c071f14682c46f7917e11592c189382f53
Time: 2018-12-17
Author: hanhxiao@tencent.com
File Name: server/bert_serving/server/__init__.py
Class Name: BertServer
Method Name: _run


Project Name: chainer/chainercv
Commit Name: 93cfd8bd22d6b798b94aead3c8ea75ace2727265
Time: 2019-02-18
Author: shingogo@hotmail.co.jp
File Name: chainercv/functions/ps_roi_max_align_2d.py
Class Name: PSROIMaxAlign2D
Method Name: forward_cpu


Project Name: NVIDIA/OpenSeq2Seq
Commit Name: 42ad0f227fa39fe9b96bc3e08b2e5704dc157e74
Time: 2018-06-26
Author: xravitejax@gmail.com
File Name: open_seq2seq/encoders/w2l_encoder.py
Class Name: Wave2LetterEncoder
Method Name: _encode