Guest User

Untitled

a guest
Jul 22nd, 2018
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.65 KB | None | 0 0
  1. import random
  2. import gym
  3. import sys
  4. import time
  5. import pickle
  6. import os
  7. env = gym.make('MountainCar-v0')
  8.  
  9.  
  10. #####
  11. # { (1, 3): [actions], (3, 2): [actions], etc... }
  12. qTable = {}
  13. epsilon = 0.2 # 探索因子
  14. alpha = 0.5 # 学习因子
  15. gamma = 0.8 # 折扣因子
  16.  
  17. MAX_EPISODE = 100000
  18. FILE_TO_SAVE = "data2"
  19.  
  20. lastSaveLen = 0
  21. isSuccess = 0
  22. i = 0
  23.  
  24. ACTION_LEFT = 0
  25. ACTION_STAY = 1
  26. ACTION_RIGHT = 2
  27.  
  28. def run():
  29. global i, isSuccess, qTable
  30. qTable = loadObj(FILE_TO_SAVE)
  31. while i < MAX_EPISODE:
  32. state = discretizeState(env.reset())
  33. done = False
  34. while not done:
  35. # 渲染
  36. # if isSuccess > 500:
  37. # env.render()
  38.  
  39. # 操作
  40. action = getActionByState(state)
  41. newState, reward, done, info = env.step(action)
  42. newState = discretizeState(newState)
  43. updateQ(state, action, newState, reward)
  44. # 切换到下一个状态了
  45. state = newState
  46.  
  47. # 成功了就拜拜了
  48. if newState[0] >= 0.5:
  49. isSuccess = isSuccess + 1
  50. if isSuccess % 1000 == 0:
  51. print("1000 Successfully! count: =>")
  52. isSuccess = 0
  53. break
  54. i = i + 1
  55. if isSuccess:
  56. print("成功的男人!")
  57. else:
  58. print("失败的男人,一千回合都没有一次成功!", i)
  59.  
  60. def getActionByState(state):
  61. hasState = state in qTable
  62. # 没有状态或者要探索的时候就随机选择操作
  63. if not hasState or (random.random() <= epsilon):
  64. return env.action_space.sample()
  65. else:
  66. # 找出所有可能的动作中最大的 Q 值的动作返回
  67. actionsQ = qTable[state]
  68. maxVal = max(actionsQ)
  69. return actionsQ.index(maxVal)
  70.  
  71. # 离散化状态,缩小状态空间
  72. def discretizeState(state):
  73. return (round(state[0], 2), round(state[1], 3))
  74.  
  75. # 更新Q值表
  76. def updateQ(state, action, nextState, reward):
  77. global lastSaveLen
  78. stateActionsQ = getActionsQByState(state)
  79. nextStateActionsQ = getActionsQByState(nextState)
  80.  
  81. currentStateQ = stateActionsQ[action]
  82. maxNextStateQ = max(nextStateActionsQ)
  83.  
  84. newStateQ = (1 - alpha) * currentStateQ + alpha * (reward + gamma * maxNextStateQ)
  85. stateActionsQ[action] = newStateQ
  86.  
  87. qTable[state] = stateActionsQ
  88. lenOfTable = len(qTable)
  89. if (lenOfTable % 100 is 0) and (lastSaveLen != lenOfTable):
  90. saveObj(qTable, FILE_TO_SAVE)
  91. print("Save done, table length", lenOfTable)
  92. lastSaveLen = lenOfTable
  93. # time.sleep(1)
  94.  
  95. def getActionsQByState(state):
  96. if state in qTable:
  97. return qTable[state]
  98. else:
  99. return [0, 0, 0]
  100.  
  101. def saveObj(obj, name):
  102. with open(name, 'wb') as f:
  103. pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
  104.  
  105. def loadObj(name):
  106. if not os.path.exists(name):
  107. return {}
  108. with open(name, 'rb') as f:
  109. return pickle.load(f)
  110.  
  111. run()
Add Comment
Please, Sign In to add comment