3a0f8d18624b326de0400bff84ddc0f8c60c46e9,src/garage/tf/algos/reps.py,REPS,_policy_opt_input_values,#REPS#,519
Before Change
// pylint: disable=unexpected-keyword-arg
policy_opt_input_values = self._policy_opt_inputs._replace(
obs_var=samples_data["observations"],
action_var=samples_data["actions"],
reward_var=samples_data["rewards"],
valid_var=samples_data["valids"],
feat_diff=self._feat_diff,
After Change
list(np.ndarray): Flatten policy optimization input values.
agent_infos = episodes.padded_agent_infos
policy_state_info_list = [
agent_infos[k] for k in self.policy.state_info_keys
]
actions = [
self._env_spec.action_space.flatten_n(act)
for act in episodes.actions_list
]
padded_actions = episodes.pad_to_last(np.concatenate(actions))
// pylint: disable=unexpected-keyword-arg
policy_opt_input_values = self._policy_opt_inputs._replace(
obs_var=episodes.padded_observations,
action_var=padded_actions,
reward_var=episodes.padded_rewards,
valid_var=episodes.valids,
feat_diff=self._feat_diff,
param_eta=self._param_eta,
In pattern: SUPERPATTERN
Frequency: 3
Non-data size: 10
Instances
Project Name: rlworkgroup/garage
Commit Name: 3a0f8d18624b326de0400bff84ddc0f8c60c46e9
Time: 2020-10-20
Author: 31981600+yeukfu@users.noreply.github.com
File Name: src/garage/tf/algos/reps.py
Class Name: REPS
Method Name: _policy_opt_input_values
Project Name: rlworkgroup/garage
Commit Name: c0fd41d73da7e7a71d6054e87370be35ca708e67
Time: 2020-10-21
Author: 31981600+yeukfu@users.noreply.github.com
File Name: src/garage/tf/algos/te_npo.py
Class Name: TENPO
Method Name: _policy_opt_input_values
Project Name: rlworkgroup/garage
Commit Name: 3a0f8d18624b326de0400bff84ddc0f8c60c46e9
Time: 2020-10-20
Author: 31981600+yeukfu@users.noreply.github.com
File Name: src/garage/tf/algos/reps.py
Class Name: REPS
Method Name: _policy_opt_input_values
Project Name: rlworkgroup/garage
Commit Name: c0fd41d73da7e7a71d6054e87370be35ca708e67
Time: 2020-10-21
Author: 31981600+yeukfu@users.noreply.github.com
File Name: src/garage/tf/algos/te_npo.py
Class Name: TENPO
Method Name: _train_once