Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # solution for exercise 2.2, about softmax solution.
- import numpy as np
- import matplotlib.pyplot as plt
- from do_class import *
- k = 1000 # k represents the number of tasks
- p = 2000 # p represent the number of play count
- def greedyisgood():
- score_list = [[]]
- final_score_list = []
- main_task = task(k)
- print(main_task)
- for exec in range(k):
- agent = eGreedyAgent()
- for n in range(p):
- action_result = agent.greedy_choice()
- reward = main_task.reward(exec, action_result)
- agent.value_feedback(action_result, reward)
- score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
- score_list.append([])
- # print('Round'+str(exec))
- # print((agent.score/p)/max(main_task.tasks[exec]))
- agent.reset()
- for play in range(p):
- result = 0
- for exec2 in range(k):
- result += score_list[exec2][play]
- final_score_list.append(float(format(result / k, '.4f')))
- with open('sharp.txt', 'w') as f:
- f.write(str(final_score_list))
- return final_score_list
- def expisgood(temp):
- score_list = [[]]
- final_score_list = []
- main_task = task(k)
- print(main_task)
- for exec in range(k):
- agent = eGreedyAgent()
- for n in range(p):
- action_result = agent.exponential_choice(temp)
- reward = main_task.reward(exec, action_result)
- agent.value_feedback(action_result, reward)
- score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
- score_list.append([])
- # print('Round'+str(exec))
- # print((agent.score/p)/max(main_task.tasks[exec]))
- agent.reset()
- for play in range(p):
- result = 0
- for exec2 in range(k):
- result += score_list[exec2][play]
- final_score_list.append(float(format(result / k, '.4f')))
- with open('sharp2.txt', 'w') as f:
- f.write(str(final_score_list))
- return final_score_list
- def main():
- plt.xlabel('play times in each task')
- plt.ylabel('average reward over 2000 tasks')
- plt.plot(greedyisgood(), 'r')
- plt.plot(expisgood(0.15), 'green')
- plt.plot(expisgood(0.1), 'teal')
- plt.show()
- if __name__ == '__main__':
- print('what')
- main()
Add Comment
Please, Sign In to add comment