Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2020
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.54 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. import math
  4.  
  5. n=10
  6.  
  7. plays=1000
  8. epoch=2000
  9. average_rewards_1=np.zeros(1000)
  10. average_rewards_2=np.zeros(1000)
  11. average_rewards_3=np.zeros(1000)
  12.  
  13. def action(epsilon, Q):
  14. #random probability
  15. p=np.random.random()
  16. #choose action
  17. if p > epsilon:
  18. action=np.argmax(Q)
  19. else:
  20. action=np.random.randint(0, 10)
  21. #get reward
  22. reward=np.random.uniform(Q[action], 1)
  23. return action, reward
  24.  
  25. for i in range(epoch):
  26. epsilon_1=0.1
  27. epsilon_2=0.01
  28. epsilon_3=0.0
  29. Q1=np.zeros((n, 1))
  30. N1=np.zeros((n, 1))
  31. Q2=np.zeros((n, 1))
  32. N2=np.zeros((n, 1))
  33. Q3=np.zeros((n, 1))
  34. N3=np.zeros((n, 1))
  35. #each Q is chosen randomly by a uniform distribution
  36. for j in range(n):
  37. Q1[j]=np.random.uniform(0, 1)
  38. Q2[j]=np.random.uniform(0, 1)
  39. Q3[j]=np.random.uniform(0, 1)
  40. iteration=0
  41. total_reward_1=0
  42. total_reward_2=0
  43. total_reward_3=0
  44. average_reward_1=[]
  45. average_reward_2=[]
  46. average_reward_3=[]
  47. total_optimal_action_1=np.zeros(1000)
  48. total_optimal_action_2=np.zeros(1000)
  49. total_optimal_action_3=np.zeros(1000)
  50. for k in range(plays):
  51. a1, reward1=action(epsilon_1, Q1)
  52. a2, reward2=action(epsilon_2, Q2)
  53. a3, reward3=action(epsilon_3, Q3)
  54. N1[a1]=N1[a1]+1
  55. N2[a2]=N2[a2]+1
  56. N3[a3]=N3[a3]+1
  57. Q1[a1]=Q1[a1]+(1/N1[a1])*(reward1-Q1[a1])
  58. Q2[a2]=Q2[a2]+(1/N2[a2])*(reward2-Q2[a2])
  59. Q3[a3]=Q3[a3]+(1/N3[a3])*(reward3-Q3[a3])
  60. iteration=iteration + 1
  61. total_reward_1=total_reward_1 + reward1
  62. total_reward_2=total_reward_2 + reward2
  63. total_reward_3=total_reward_3 + reward3
  64. average_reward_1.append(total_reward_1/iteration)
  65. average_reward_2.append(total_reward_2/iteration)
  66. average_reward_3.append(total_reward_3/iteration)
  67. #??
  68. average_rewards_1=np.add(average_rewards_1, average_reward_1)
  69. average_rewards_2=np.add(average_rewards_2, average_reward_2)
  70. average_rewards_3=np.add(average_rewards_3, average_reward_3)
  71.  
  72. average_rewards_1=average_rewards_1/2000
  73. average_rewards_2=average_rewards_2/2000
  74. average_rewards_3=average_rewards_3/2000
  75.  
  76. plt.plot(range(1000), average_rewards_1, label='Epsilon=0.1', color='b')
  77. plt.plot(range(1000), average_rewards_2, label='Epsilon=0.01', color='k')
  78. plt.plot(range(1000), average_rewards_3, label='Epsilon=0.0', color='r')
  79. plt.xlabel('Iterations')
  80. plt.ylabel('Average reward')
  81. plt.legend(loc='best')
  82. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement