Untitled

# use the test set to evaluate the policy
states = Tt[:, :, 0].flatten().tolist()

values = []
best_actions = []
for s in states:
    a_best, v_best = best_action(Q, s, offers)
    values.append(v_best)
    best_actions.append(a_best)

s_tsne = TSNE().fit_transform(states)

# value function for each state
plt.scatter(s_tsne[:, 0], s_tsne[:, 1], c = values)
plt.colorbar()
plt.show()

# recommended next best actions for each state
plt.scatter(s_tsne[:, 0], s_tsne[:, 1], c = best_actions)
plt.colorbar()
plt.show()