LyWang

2.7.py

Nov 7th, 2018
166
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.88 KB | None | 0 0
  1. # solution for exercise 2.2, about softmax solution.
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from do_class import *
  5.  
  6. k = 100  # k represents the number of tasks
  7. p = 2000  # p represent the number of play count
  8.  
  9.  
  10. def test():
  11.     main_task = task(k, False)
  12.     print(main_task.tasks[0])
  13.  
  14.  
  15. def greedyisgood():
  16.     score_list = [[]]
  17.     final_score_list = []
  18.     main_task = task(k, False)
  19.     print('greedy')
  20.     print(main_task.tasks[0])
  21.     for exec in range(k):
  22.         agent = eGreedyAgent()
  23.         for n in range(p):
  24.             action_result = agent.greedy_choice()
  25.             reward = main_task.reward(exec, action_result)
  26.             agent.value_feedback(action_result, reward)
  27.             score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
  28.             main_task.nonstationarymove(exec)
  29.         score_list.append([])
  30.         # print('Round'+str(exec))
  31.         # print((agent.score/p)/max(main_task.tasks[exec]))
  32.         agent.reset()
  33.     for play in range(p):
  34.         result = 0
  35.         for exec2 in range(k):
  36.             result += score_list[exec2][play]
  37.         final_score_list.append(float(format(result / k, '.4f')))
  38.     with open('greedy.txt', 'w') as f:
  39.         f.write(str(final_score_list))
  40.     return final_score_list
  41.  
  42.  
  43. def nsisgood():
  44.     score_list = [[]]
  45.     final_score_list = []
  46.     main_task = task(k, False)
  47.     print('nsisgood')
  48.     print(main_task.tasks[0])
  49.     for exec in range(k):
  50.         agent = eGreedyAgent()
  51.         for n in range(p):
  52.             action_result = agent.greedy_choice()
  53.             reward = main_task.reward(exec, action_result)
  54.             agent.nonstationary_value_feedback(action_result, reward)
  55.             score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
  56.             main_task.nonstationarymove(exec)
  57.         score_list.append([])
  58.         # print('Round'+str(exec))
  59.         # print((agent.score/p)/max(main_task.tasks[exec]))
  60.         agent.reset()
  61.     for play in range(p):
  62.         result = 0
  63.         for exec2 in range(k):
  64.             result += score_list[exec2][play]
  65.         final_score_list.append(float(format(result / k, '.4f')))
  66.     with open('fixStepSize.txt', 'w') as f:
  67.         f.write(str(final_score_list))
  68.     return final_score_list
  69.  
  70.  
  71. def comparisonisgood():
  72.     score_list = [[]]
  73.     final_score_list = []
  74.     main_task3 = task(k, False)
  75.     print('comparison')
  76.     print(main_task3.tasks[0])
  77.     for exec in range(k):
  78.         agent = eGreedyAgent()
  79.         for n in range(p):
  80.             action_result = agent.exponential_choice()
  81.             # print(action_result)
  82.             reward = main_task3.reward(exec, action_result)
  83.             agent.reinforcement_comparison_feedback(action_result, reward)
  84.             score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
  85.             main_task3.nonstationarymove(exec)
  86.         score_list.append([])
  87.         # print('Round'+str(exec))
  88.         # print((agent.score/p)/max(main_task.tasks[exec]))
  89.         agent.reset()
  90.     for play in range(p):
  91.         result = 0
  92.         for exec2 in range(k):
  93.             result += score_list[exec2][play]
  94.         final_score_list.append(float(format(result / k, '.4f')))
  95.     with open('comparison.txt', 'w') as f:
  96.         f.write(str(final_score_list))
  97.     return final_score_list
  98.  
  99. def comparisonisgoodwf():
  100.     score_list = [[]]
  101.     final_score_list = []
  102.     main_task3 = task(k, False)
  103.     print('comparisonwf')
  104.     print(main_task3.tasks[0])
  105.     for exec in range(k):
  106.         agent = eGreedyAgent()
  107.         for n in range(p):
  108.             action_result = agent.exponential_choice()
  109.             # print(action_result)
  110.             reward = main_task3.reward(exec, action_result)
  111.             agent.reinforcement_comparison_feedback_without_factor(action_result, reward)
  112.             score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
  113.             main_task3.nonstationarymove(exec)
  114.         score_list.append([])
  115.         # print('Round'+str(exec))
  116.         # print((agent.score/p)/max(main_task.tasks[exec]))
  117.         agent.reset()
  118.     for play in range(p):
  119.         result = 0
  120.         for exec2 in range(k):
  121.             result += score_list[exec2][play]
  122.         final_score_list.append(float(format(result / k, '.4f')))
  123.     with open('comparison.txt', 'w') as f:
  124.         f.write(str(final_score_list))
  125.     return final_score_list
  126.  
  127. def persuitisgood():
  128.     score_list = [[]]
  129.     final_score_list = []
  130.     main_task = task(k, False)
  131.     print('persuit')
  132.     print(main_task.tasks[0])
  133.     for exec in range(k):
  134.         agent = eGreedyAgent()
  135.         for n in range(p):
  136.             action_result = agent.persuit_choice()
  137.             # print('action_result')
  138.             # print(action_result)
  139.             reward = main_task.reward(exec, action_result)
  140.             agent.nonstationary_value_feedback(action_result, reward)
  141.             score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
  142.             main_task.nonstationarymove(exec)
  143.         # print('persuitcounter')
  144.         # print(agent.counter)
  145.         score_list.append([])
  146.         # print('Round'+str(exec))
  147.         # print((agent.score/p)/max(main_task.tasks[exec]))
  148.         agent.reset()
  149.     for play in range(p):
  150.         result = 0
  151.         for exec2 in range(k):
  152.             result += score_list[exec2][play]
  153.         final_score_list.append(float(format(result / k, '.4f')))
  154.     with open('persuit.txt', 'w') as f:
  155.         f.write(str(final_score_list))
  156.     return final_score_list
  157.  
  158.  
  159. def main():
  160.     test()
  161.     plt.plot(persuitisgood(), 'purple')
  162.     plt.xlabel('play times in each task')
  163.     plt.ylabel('average reward over 2000 tasks')
  164.     plt.plot(greedyisgood(), 'r')
  165.     plt.plot(nsisgood(), 'b')
  166.     plt.plot(comparisonisgood(), 'y')
  167.     plt.plot(comparisonisgoodwf(), 'g')
  168.     plt.show()
  169.  
  170.  
  171. if __name__ == '__main__':
  172.     print('start')
  173.     main()
Add Comment
Please, Sign In to add comment