Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- scores = []
- choices = []
- for each_game in range(100):
- score = 0
- prev_obs = []
- for step_index in range(goal_steps):
- env.render()
- if len(prev_obs)==0:
- action = random.randrange(0,2)
- else:
- action = np.argmax(trained_model.predict(prev_obs.reshape(-1, len(prev_obs)))[0])
- choices.append(action)
- new_observation, reward, done, info = env.step(action)
- prev_obs = new_observation
- score+=reward
- if done:
- break
- env.reset()
- scores.append(score)
- print(scores)
- print('Average Score:', sum(scores)/len(scores))
- print('choice 1:{} choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
Add Comment
Please, Sign In to add comment