core_config["num_workers"] = 0 // Run locally.
core_config["env_config"] = {"is_slippery": False, "map_name": "4x4"}
for fw in framework_iterator(core_config, ["tf", "eager"]):
config = core_config.copy()
After Change
// the same action for the same input (parameter noise is
// deterministic).
policy = trainer.get_policy()
p_sess = getattr(policy, "_sess", None)
policy.exploration.on_episode_start(policy, tf_sess=p_sess)
a_ = trainer.compute_action(obs)
for _ in range(10):
a = trainer.compute_action(obs, explore=True)