self._check_trajectory_dimensions(experience)
if self._n_step_update == 1:
time_steps, actions, next_time_steps = self._experience_to_transitions(
experience)else:
// To compute n-step returns, we need the first time steps, the first// actions, and the last time steps. Therefore we extract the first and// last transitions from our Trajectory.
After Change
// method requires a time dimension to compute the loss properly.
self._check_trajectory_dimensions(experience)
squeeze_time_dim = not self._q_network.state_specif self._n_step_update == 1:
time_steps, policy_steps, next_time_steps = (
trajectory.experience_to_transitions(experience, squeeze_time_dim))
actions = policy_steps.action
else:
// To compute n-step returns, we need the first time steps, the first// actions, and the last time steps. Therefore we extract the first and