Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # solution for exercise 2.2, about softmax solution.
- import numpy as np
- import matplotlib.pyplot as plt
- from do_class import *
- k = 100 # k represents the number of tasks
- p = 2000 # p represent the number of play count
- def test():
- main_task = task(k, False)
- print(main_task.tasks[0])
- def greedyisgood():
- score_list = [[]]
- final_score_list = []
- main_task = task(k, False)
- print('greedy')
- print(main_task.tasks[0])
- for exec in range(k):
- agent = eGreedyAgent()
- for n in range(p):
- action_result = agent.greedy_choice()
- reward = main_task.reward(exec, action_result)
- agent.value_feedback(action_result, reward)
- score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
- main_task.nonstationarymove(exec)
- score_list.append([])
- # print('Round'+str(exec))
- # print((agent.score/p)/max(main_task.tasks[exec]))
- agent.reset()
- for play in range(p):
- result = 0
- for exec2 in range(k):
- result += score_list[exec2][play]
- final_score_list.append(float(format(result / k, '.4f')))
- with open('greedy.txt', 'w') as f:
- f.write(str(final_score_list))
- return final_score_list
- def nsisgood():
- score_list = [[]]
- final_score_list = []
- main_task = task(k, False)
- print('nsisgood')
- print(main_task.tasks[0])
- for exec in range(k):
- agent = eGreedyAgent()
- for n in range(p):
- action_result = agent.greedy_choice()
- reward = main_task.reward(exec, action_result)
- agent.nonstationary_value_feedback(action_result, reward)
- score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
- main_task.nonstationarymove(exec)
- score_list.append([])
- # print('Round'+str(exec))
- # print((agent.score/p)/max(main_task.tasks[exec]))
- agent.reset()
- for play in range(p):
- result = 0
- for exec2 in range(k):
- result += score_list[exec2][play]
- final_score_list.append(float(format(result / k, '.4f')))
- with open('fixStepSize.txt', 'w') as f:
- f.write(str(final_score_list))
- return final_score_list
- def comparisonisgood():
- score_list = [[]]
- final_score_list = []
- main_task3 = task(k, False)
- print('comparison')
- print(main_task3.tasks[0])
- for exec in range(k):
- agent = eGreedyAgent()
- for n in range(p):
- action_result = agent.exponential_choice()
- # print(action_result)
- reward = main_task3.reward(exec, action_result)
- agent.reinforcement_comparison_feedback(action_result, reward)
- score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
- main_task3.nonstationarymove(exec)
- score_list.append([])
- # print('Round'+str(exec))
- # print((agent.score/p)/max(main_task.tasks[exec]))
- agent.reset()
- for play in range(p):
- result = 0
- for exec2 in range(k):
- result += score_list[exec2][play]
- final_score_list.append(float(format(result / k, '.4f')))
- with open('comparison.txt', 'w') as f:
- f.write(str(final_score_list))
- return final_score_list
- def comparisonisgoodwf():
- score_list = [[]]
- final_score_list = []
- main_task3 = task(k, False)
- print('comparisonwf')
- print(main_task3.tasks[0])
- for exec in range(k):
- agent = eGreedyAgent()
- for n in range(p):
- action_result = agent.exponential_choice()
- # print(action_result)
- reward = main_task3.reward(exec, action_result)
- agent.reinforcement_comparison_feedback_without_factor(action_result, reward)
- score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
- main_task3.nonstationarymove(exec)
- score_list.append([])
- # print('Round'+str(exec))
- # print((agent.score/p)/max(main_task.tasks[exec]))
- agent.reset()
- for play in range(p):
- result = 0
- for exec2 in range(k):
- result += score_list[exec2][play]
- final_score_list.append(float(format(result / k, '.4f')))
- with open('comparison.txt', 'w') as f:
- f.write(str(final_score_list))
- return final_score_list
- def persuitisgood():
- score_list = [[]]
- final_score_list = []
- main_task = task(k, False)
- print('persuit')
- print(main_task.tasks[0])
- for exec in range(k):
- agent = eGreedyAgent()
- for n in range(p):
- action_result = agent.persuit_choice()
- # print('action_result')
- # print(action_result)
- reward = main_task.reward(exec, action_result)
- agent.nonstationary_value_feedback(action_result, reward)
- score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
- main_task.nonstationarymove(exec)
- # print('persuitcounter')
- # print(agent.counter)
- score_list.append([])
- # print('Round'+str(exec))
- # print((agent.score/p)/max(main_task.tasks[exec]))
- agent.reset()
- for play in range(p):
- result = 0
- for exec2 in range(k):
- result += score_list[exec2][play]
- final_score_list.append(float(format(result / k, '.4f')))
- with open('persuit.txt', 'w') as f:
- f.write(str(final_score_list))
- return final_score_list
- def main():
- test()
- plt.plot(persuitisgood(), 'purple')
- plt.xlabel('play times in each task')
- plt.ylabel('average reward over 2000 tasks')
- plt.plot(greedyisgood(), 'r')
- plt.plot(nsisgood(), 'b')
- plt.plot(comparisonisgood(), 'y')
- plt.plot(comparisonisgoodwf(), 'g')
- plt.show()
- if __name__ == '__main__':
- print('start')
- main()
Add Comment
Please, Sign In to add comment