36bda8432b2ed23f623c8c3031b2cba148bec93b,rllib/examples/policy/episode_env_aware_policy.py,EpisodeEnvAwarePolicy,compute_actions_from_input_dict,#EpisodeEnvAwarePolicy#Any#Any#Any#,48

Before Change


        self.episode_id = input_dict[SampleBatch.EPS_ID][0]
        self.env_id = input_dict["env_id"][0]
        // Always return (episodeID, envID)
        return [
            np.array([self.episode_id, self.env_id]) for _ in input_dict["obs"]
        ], [], {}

    @override(Policy)
    def postprocess_trajectory(self,
                               sample_batch,

After Change


                                        explore=None,
                                        timestep=None,
                                        **kwargs):
        ts = input_dict["t"]
        print(ts)
        // Always return [episodeID, envID] as actions.
        actions = np.array([[
            input_dict[SampleBatch.AGENT_INDEX][i],
            input_dict[SampleBatch.EPS_ID][i], input_dict["env_id"][i]
        ] for i, _ in enumerate(input_dict["obs"])])
        states = [
            np.array([[ts[i]] for i in range(len(input_dict["obs"]))])
            for _ in range(2)
        ]
        return actions, states, {}

    @override(Policy)
    def postprocess_trajectory(self,
                               sample_batch,
Italian Trulli
In pattern: SUPERPATTERN

Frequency: 3

Non-data size: 7

Instances


Project Name: ray-project/ray
Commit Name: 36bda8432b2ed23f623c8c3031b2cba148bec93b
Time: 2020-10-01
Author: sven@anyscale.io
File Name: rllib/examples/policy/episode_env_aware_policy.py
Class Name: EpisodeEnvAwarePolicy
Method Name: compute_actions_from_input_dict


Project Name: d2l-ai/d2l-zh
Commit Name: 3a770cbc97085c2cd4eaa0a46b2bc037f35389c2
Time: 2017-10-25
Author: muli@cs.cmu.edu
File Name: utils.py
Class Name:
Method Name: evaluate_accuracy


Project Name: NervanaSystems/coach
Commit Name: 19ad2d60a7022bb5125855c029f27d86aaa46d64
Time: 2019-07-14
Author: gal.leibovich@intel.com
File Name: rl_coach/filters/reward/reward_normalization_filter.py
Class Name: RewardNormalizationFilter
Method Name: filter