Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def attempt(self):
- observation = self.discretise(self.environment.reset())
- done = False
- reward_sum = 0.0
- while not done:
- action = self.pick_best_action(observation)
- new_observation, reward, done, info = self.environment.step(action)
- if done:
- reward = 0.0
- new_observation = self.discretise(new_observation)
- self.update_knowledge(action, observation, new_observation, reward)
- observation = new_observation
- reward_sum += reward
- self.attempt_no += 1
- return reward_sum
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement