4f2535df9cb702854ef892c0d2a92ef068636ce0,examples/reinforcement_learning/baselines/algorithms/td3/td3.py,,learn,#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#Any#,271
Before Change
if done:
break
episode = int(frame_idx / max_steps) // current episode
all_episodes = int(max_frames / max_steps) // total episodes
print("Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}"\
.format(episode, all_episodes, episode_reward, time.time()-t0 ))
rewards.append(episode_reward)
After Change
frame_idx = 0
rewards = []
t0 = time.time()
for eps in range(train_episodes):
state = env.reset()
state = state.astype(np.float32)
episode_reward = 0
if frame_idx < 1:
_ = td3_trainer.policy_net(
[state]
) // need an extra call here to make inside functions be able to use model.forward
_ = td3_trainer.target_policy_net([state])
for step in range(max_steps):
if frame_idx > explore_steps:
action = td3_trainer.policy_net.get_action(state, explore_noise_scale=1.0)
else:
action = td3_trainer.policy_net.sample_action()
next_state, reward, done, _ = env.step(action)
next_state = next_state.astype(np.float32)
env.render()
done = 1 if done ==True else 0
replay_buffer.push(state, action, reward, next_state, done)
state = next_state
episode_reward += reward
frame_idx += 1
if len(replay_buffer) > batch_size:
for i in range(update_itr):
td3_trainer.update(batch_size, eval_noise_scale=0.5, reward_scale=1.)
if done:
break
if eps % int(save_interval) == 0:
plot(rewards, Algorithm_name="TD3", Env_name=env_id)
td3_trainer.save_weights()
print("Episode: {}/{} | Episode Reward: {:.4f} | Running Time: {:.4f}"\
.format(eps, train_episodes, episode_reward, time.time()-t0 ))
rewards.append(episode_reward)
td3_trainer.save_weights()
if mode=="test":
frame_idx = 0
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 4
Instances
Project Name: tensorlayer/tensorlayer
Commit Name: 4f2535df9cb702854ef892c0d2a92ef068636ce0
Time: 2019-07-04
Author: 1402434478@qq.com
File Name: examples/reinforcement_learning/baselines/algorithms/td3/td3.py
Class Name:
Method Name: learn
Project Name: tsurumeso/waifu2x-chainer
Commit Name: 6e4be1285888e9caa90fa483e10b869ceb3157d4
Time: 2017-03-03
Author: nstm101339@gmail.com
File Name: waifu2x.py
Class Name:
Method Name: upscale_image
Project Name: microsoft/nni
Commit Name: 26b71c40e3ed6cd9dc9601da5a1f994c689b16ff
Time: 2020-11-13
Author: 33053116+J-shang@users.noreply.github.com
File Name: nni/algorithms/hpo/hyperband_advisor/hyperband_advisor.py
Class Name: Hyperband
Method Name: _handle_trial_end