for i_episode in range(MAX_EPISODE):
episode_time = time.time()
s = env.reset()
t = 0 // number of step in this episode
all_r = [] // rewards of all steps
while True:
if RENDER: env.render()
After Change
for i_episode in range(MAX_EPISODE):
episode_time = time.time()
s = env.reset().astype(np.float32)
t = 0 // number of step in this episode
all_r = [] // rewards of all steps
while True:
if RENDER: env.render()