Untitled

import random
import gym
import sys
import time
import pickle
import os
env = gym.make('MountainCar-v0')


#####
# { (1, 3): [actions], (3, 2): [actions], etc... }
qTable = {}
epsilon = 0.2 # 探索因子
alpha = 0.5 # 学习因子
gamma = 0.8 # 折扣因子

MAX_EPISODE = 100000
FILE_TO_SAVE = "data2"

lastSaveLen = 0
isSuccess = 0
i = 0

ACTION_LEFT = 0
ACTION_STAY = 1
ACTION_RIGHT = 2

def run():
  global i, isSuccess, qTable
  qTable = loadObj(FILE_TO_SAVE)
  while i < MAX_EPISODE:
    state = discretizeState(env.reset())
    done = False
    while not done:
      # 渲染
      # if isSuccess > 500:
      #   env.render()

      # 操作
      action = getActionByState(state)
      newState, reward, done, info = env.step(action)
      newState = discretizeState(newState)
      updateQ(state, action, newState, reward)
      # 切换到下一个状态了
      state = newState

      # 成功了就拜拜了
      if newState[0] >= 0.5:
        isSuccess = isSuccess + 1
        if isSuccess % 1000 == 0:
          print("1000 Successfully! count: =>")
          isSuccess = 0
        break
    i = i + 1
  if isSuccess:
    print("成功的男人！")
  else:
    print("失败的男人，一千回合都没有一次成功！", i)

def getActionByState(state):
  hasState = state in qTable
  # 没有状态或者要探索的时候就随机选择操作
  if not hasState or (random.random() <= epsilon):
    return env.action_space.sample()
  else:
    # 找出所有可能的动作中最大的 Q 值的动作返回
    actionsQ = qTable[state]
    maxVal = max(actionsQ)
    return actionsQ.index(maxVal)

# 离散化状态，缩小状态空间
def discretizeState(state):
  return (round(state[0], 2), round(state[1], 3))

# 更新Q值表
def updateQ(state, action, nextState, reward):
  global lastSaveLen
  stateActionsQ = getActionsQByState(state)
  nextStateActionsQ = getActionsQByState(nextState)

  currentStateQ = stateActionsQ[action]
  maxNextStateQ = max(nextStateActionsQ)

  newStateQ = (1 - alpha) * currentStateQ + alpha * (reward + gamma * maxNextStateQ)
  stateActionsQ[action] = newStateQ

  qTable[state] = stateActionsQ
  lenOfTable  = len(qTable)
  if (lenOfTable % 100 is 0) and (lastSaveLen != lenOfTable):
    saveObj(qTable, FILE_TO_SAVE)
    print("Save done, table length", lenOfTable)
    lastSaveLen = lenOfTable
  # time.sleep(1)

def getActionsQByState(state):
  if state in qTable:
    return qTable[state]
  else:
    return [0, 0, 0]

def saveObj(obj, name):
  with open(name, 'wb') as f:
    pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def loadObj(name):
  if not os.path.exists(name):
    return {}
  with open(name, 'rb') as f:
    return pickle.load(f)

run()