Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import random
- import gym
- import sys
- import time
- import pickle
- import os
- env = gym.make('MountainCar-v0')
- #####
- # { (1, 3): [actions], (3, 2): [actions], etc... }
- qTable = {}
- epsilon = 0.2 # 探索因子
- alpha = 0.5 # 学习因子
- gamma = 0.8 # 折扣因子
- MAX_EPISODE = 100000
- FILE_TO_SAVE = "data2"
- lastSaveLen = 0
- isSuccess = 0
- i = 0
- ACTION_LEFT = 0
- ACTION_STAY = 1
- ACTION_RIGHT = 2
- def run():
- global i, isSuccess, qTable
- qTable = loadObj(FILE_TO_SAVE)
- while i < MAX_EPISODE:
- state = discretizeState(env.reset())
- done = False
- while not done:
- # 渲染
- # if isSuccess > 500:
- # env.render()
- # 操作
- action = getActionByState(state)
- newState, reward, done, info = env.step(action)
- newState = discretizeState(newState)
- updateQ(state, action, newState, reward)
- # 切换到下一个状态了
- state = newState
- # 成功了就拜拜了
- if newState[0] >= 0.5:
- isSuccess = isSuccess + 1
- if isSuccess % 1000 == 0:
- print("1000 Successfully! count: =>")
- isSuccess = 0
- break
- i = i + 1
- if isSuccess:
- print("成功的男人!")
- else:
- print("失败的男人,一千回合都没有一次成功!", i)
- def getActionByState(state):
- hasState = state in qTable
- # 没有状态或者要探索的时候就随机选择操作
- if not hasState or (random.random() <= epsilon):
- return env.action_space.sample()
- else:
- # 找出所有可能的动作中最大的 Q 值的动作返回
- actionsQ = qTable[state]
- maxVal = max(actionsQ)
- return actionsQ.index(maxVal)
- # 离散化状态,缩小状态空间
- def discretizeState(state):
- return (round(state[0], 2), round(state[1], 3))
- # 更新Q值表
- def updateQ(state, action, nextState, reward):
- global lastSaveLen
- stateActionsQ = getActionsQByState(state)
- nextStateActionsQ = getActionsQByState(nextState)
- currentStateQ = stateActionsQ[action]
- maxNextStateQ = max(nextStateActionsQ)
- newStateQ = (1 - alpha) * currentStateQ + alpha * (reward + gamma * maxNextStateQ)
- stateActionsQ[action] = newStateQ
- qTable[state] = stateActionsQ
- lenOfTable = len(qTable)
- if (lenOfTable % 100 is 0) and (lastSaveLen != lenOfTable):
- saveObj(qTable, FILE_TO_SAVE)
- print("Save done, table length", lenOfTable)
- lastSaveLen = lenOfTable
- # time.sleep(1)
- def getActionsQByState(state):
- if state in qTable:
- return qTable[state]
- else:
- return [0, 0, 0]
- def saveObj(obj, name):
- with open(name, 'wb') as f:
- pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
- def loadObj(name):
- if not os.path.exists(name):
- return {}
- with open(name, 'rb') as f:
- return pickle.load(f)
- run()
Add Comment
Please, Sign In to add comment