b_state = self.obs_normalizer(b_state, update=False)
// action_distrib will be recomputed when computing gradients
with chainer.using_config("train", False), chainer.no_backprop_mode():
action_distrib, value = self.model(b_state)
action = chainer.cuda.to_cpu(action_distrib.sample().data)[0]
self.entropy_record.append(float(action_distrib.entropy.data))