return policy_loss
def critic_loss(returns, value):
return tf.keras.losses.mean_squared_error(returns, value)
class ActorCriticNetworkContinuous(ActorCriticNetwork):
Neural network for an Actor of an Actor-Critic algorithm using a continuous action space.
After Change
return policy_loss
def critic_loss(returns, value):
return tf.square(value - returns)
class ActorCriticNetworkContinuous(ActorCriticNetwork):
Neural network for an Actor of an Actor-Critic algorithm using a continuous action space.