Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # use the test set to evaluate the policy
- states = Tt[:, :, 0].flatten().tolist()
- values = []
- best_actions = []
- for s in states:
- a_best, v_best = best_action(Q, s, offers)
- values.append(v_best)
- best_actions.append(a_best)
- s_tsne = TSNE().fit_transform(states)
- # value function for each state
- plt.scatter(s_tsne[:, 0], s_tsne[:, 1], c = values)
- plt.colorbar()
- plt.show()
- # recommended next best actions for each state
- plt.scatter(s_tsne[:, 0], s_tsne[:, 1], c = best_actions)
- plt.colorbar()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement