s = self.env.reset()
ep_r = 0
buffer_s, buffer_a, buffer_r = [], [], []
t0 = time.time()
for t in range(EP_LEN):
if not ROLLING_EVENT.is_set(): // while global PPO is updating
ROLLING_EVENT.wait() // wait until PPO is updated
buffer_s, buffer_a, buffer_r = [], [], [] // clear history buffer, use new policy to collect data
After Change
discounted_r.append(v_s_)
discounted_r.reverse()
buffer_r = np.array(discounted_r)[:, np.newaxis]
QUEUE.put([buffer_s, buffer_a, buffer_r]) // put data in the queue
buffer_s, buffer_a, buffer_r = [], [], []
// update