Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- "286c70300a4930310a614930300a61286c70310a284c304c0a46352e330a7470320a61284c3130303030304c0a4636362e380a7470330a61284c3230303030304c0a4638332e300a7470340a61284c3330303030304c0a463130302e300a7470350a61284c3430303030304c0a463130302e300a7470360a61284c3530303030304c0a463130302e300a7470370a61284c3630303030304c0a463130302e300a7470380a61284c3730303030304c0a463130302e300a7470390a61284c3830303030304c0a463130302e300a747031300a61284c3930303030304c0a463130302e300a747031310a61612e"
- "286c70300a4930310a614930310a61286c70310a284c304c0a4631362e330a7470320a61284c35304c0a4632382e320a7470330a61284c3130304c0a4635362e380a7470340a61284c3135304c0a463130302e300a7470350a61284c3230304c0a463130302e300a7470360a61284c3235304c0a463130302e300a7470370a61284c3330304c0a463130302e300a7470380a61284c3335304c0a463130302e300a7470390a61284c3430304c0a463130302e300a747031300a61284c3435304c0a463130302e300a747031310a61612e"
- "286c70300a4930300a614930300a61286c70310a284c304c0a46372e330a7470320a61284c31303030304c0a4633392e340a7470330a61284c32303030304c0a4639302e350a7470340a61284c33303030304c0a4635382e390a7470350a61284c34303030304c0a463130302e300a7470360a61284c35303030304c0a4639302e350a7470370a61284c36303030304c0a463130302e300a7470380a61284c37303030304c0a463130302e300a7470390a61284c38303030304c0a463130302e300a747031300a61284c39303030304c0a4638362e310a747031310a61612e"
- "286c70300a4930300a614930310a61286c70310a284c304c0a4631352e330a7470320a61284c31304c0a4631392e370a7470330a61284c32304c0a4631362e390a7470340a61284c33304c0a4631312e300a7470350a61284c34304c0a4634312e320a7470360a61284c35304c0a4639302e350a7470370a61284c36304c0a4637362e330a7470380a61284c37304c0a463130302e300a7470390a61284c38304c0a463130302e300a747031300a61284c39304c0a4638372e340a747031310a61612e"
- def Q_learn_batch(mdp, q, lr=.1, iters=100, eps=0.5,
- episode_length=10, n_episodes=2,
- interactive_fn=None):
- all_experiences = []
- explore = lambda s: epsilon_greedy(q,s,eps)
- for i in range(iters):
- for e in range(n_episodes):
- _, episode, _ = sim_episode(mdp, episode_length, explore)
- all_experiences += episode
- all_q_targets = []
- for (s, a, r, s_prime) in all_experiences:
- future_val = 0 if s_prime is None else value(q, s_prime)
- all_q_targets.append((s, a, (r + mdp.discount_factor * future_val)))
- q.update(all_q_targets, lr)
- if interactive_fn: interactive_fn(q, i)
- return q
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement