LyWang

main.py_for_AI_1

Nov 7th, 2018
146
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.31 KB | None | 0 0
  1. # solution for exercise 2.2, about softmax solution.
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from do_class import *
  5.  
  6. k = 1000  # k represents the number of tasks
  7. p = 2000  # p represent the number of play count
  8.  
  9.  
  10. def greedyisgood():
  11.     score_list = [[]]
  12.     final_score_list = []
  13.     main_task = task(k)
  14.     print(main_task)
  15.     for exec in range(k):
  16.         agent = eGreedyAgent()
  17.         for n in range(p):
  18.             action_result = agent.greedy_choice()
  19.             reward = main_task.reward(exec, action_result)
  20.             agent.value_feedback(action_result, reward)
  21.             score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
  22.         score_list.append([])
  23.         # print('Round'+str(exec))
  24.         # print((agent.score/p)/max(main_task.tasks[exec]))
  25.         agent.reset()
  26.     for play in range(p):
  27.         result = 0
  28.         for exec2 in range(k):
  29.             result += score_list[exec2][play]
  30.         final_score_list.append(float(format(result / k, '.4f')))
  31.     with open('sharp.txt', 'w') as f:
  32.         f.write(str(final_score_list))
  33.     return final_score_list
  34.  
  35.  
  36. def expisgood(temp):
  37.     score_list = [[]]
  38.     final_score_list = []
  39.     main_task = task(k)
  40.     print(main_task)
  41.     for exec in range(k):
  42.         agent = eGreedyAgent()
  43.         for n in range(p):
  44.             action_result = agent.exponential_choice(temp)
  45.             reward = main_task.reward(exec, action_result)
  46.             agent.value_feedback(action_result, reward)
  47.             score_list[exec].append(float(format(agent.score / (n + 1), '.2f')))
  48.         score_list.append([])
  49.         # print('Round'+str(exec))
  50.         # print((agent.score/p)/max(main_task.tasks[exec]))
  51.         agent.reset()
  52.     for play in range(p):
  53.         result = 0
  54.         for exec2 in range(k):
  55.             result += score_list[exec2][play]
  56.         final_score_list.append(float(format(result / k, '.4f')))
  57.     with open('sharp2.txt', 'w') as f:
  58.         f.write(str(final_score_list))
  59.     return final_score_list
  60.  
  61.  
  62. def main():
  63.     plt.xlabel('play times in each task')
  64.     plt.ylabel('average reward over 2000 tasks')
  65.     plt.plot(greedyisgood(), 'r')
  66.     plt.plot(expisgood(0.15), 'green')
  67.     plt.plot(expisgood(0.1), 'teal')
  68.     plt.show()
  69.  
  70.  
  71. if __name__ == '__main__':
  72.     print('what')
  73.     main()
Add Comment
Please, Sign In to add comment