Advertisement
Guest User

Untitled

a guest
May 29th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.62 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": null,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [],
  10. "source": [
  11. "import gym\n",
  12. "import os\n",
  13. "import sys\n",
  14. "import numpy as np\n",
  15. "import matplotlib.pyplot as plt\n",
  16. "from gym import wrappers\n",
  17. "from datetime import datetime\n",
  18. "from sklearn.preprocessing import StandardScaler\n",
  19. "from keras.models import Sequential\n",
  20. "from keras.layers import Dense, Dropout, Activation\n",
  21. "from keras.layers import Embedding\n",
  22. "from keras.optimizers import SGD, RMSprop, Adam, Adamax"
  23. ]
  24. },
  25. {
  26. "cell_type": "code",
  27. "execution_count": null,
  28. "metadata": {
  29. "collapsed": true
  30. },
  31. "outputs": [],
  32. "source": [
  33. "def plot_running_avg(totalrewards):\n",
  34. " N = len(totalrewards)\n",
  35. " running_avg = np.empty(N)\n",
  36. " for t in range(N):\n",
  37. " running_avg[t] = totalrewards[max(0, t-100):(t+1)].mean()\n",
  38. " plt.plot(running_avg)\n",
  39. " plt.title(\"Running Average\")\n",
  40. " plt.show()"
  41. ]
  42. },
  43. {
  44. "cell_type": "code",
  45. "execution_count": null,
  46. "metadata": {
  47. "collapsed": false
  48. },
  49. "outputs": [],
  50. "source": [
  51. "env = gym.make('LunarLander-v2')"
  52. ]
  53. },
  54. {
  55. "cell_type": "code",
  56. "execution_count": null,
  57. "metadata": {
  58. "collapsed": true
  59. },
  60. "outputs": [],
  61. "source": [
  62. "# build a set of samples so we can get a scaler fitted.\n",
  63. "observation_samples = []\n",
  64. "\n",
  65. "# play a bunch of games randomly and collect observations\n",
  66. "for n in range(1000):\n",
  67. " observation = env.reset()\n",
  68. " observation_samples.append(observation)\n",
  69. " done = False\n",
  70. " while not done:\n",
  71. " action = np.random.randint(0, env.action_space.n)\n",
  72. " observation, reward, done, _ = env.step(action)\n",
  73. " observation_samples.append(observation)\n",
  74. " \n",
  75. "observation_samples = np.array(observation_samples)"
  76. ]
  77. },
  78. {
  79. "cell_type": "code",
  80. "execution_count": null,
  81. "metadata": {
  82. "collapsed": false
  83. },
  84. "outputs": [],
  85. "source": [
  86. "env = wrappers.Monitor(env, 'monitor-folder', force=True)"
  87. ]
  88. },
  89. {
  90. "cell_type": "code",
  91. "execution_count": null,
  92. "metadata": {
  93. "collapsed": false,
  94. "scrolled": true
  95. },
  96. "outputs": [],
  97. "source": [
  98. "# Create scaler and fit\n",
  99. "scaler = StandardScaler()\n",
  100. "scaler.fit(observation_samples)"
  101. ]
  102. },
  103. {
  104. "cell_type": "code",
  105. "execution_count": null,
  106. "metadata": {
  107. "collapsed": true
  108. },
  109. "outputs": [],
  110. "source": [
  111. "# Using the excellent Keras to build standard feedforward neural network.\n",
  112. "# single output node, linear activation on the output\n",
  113. "# To keep things simple, one NN is created per action. So\n",
  114. "# in this problem, 4 independant neural networks are create\n",
  115. "# Admax optimizer is the most efficient one, using default parameters.\n",
  116. "\n",
  117. "def create_nn():\n",
  118. " model = Sequential()\n",
  119. " model.add(Dense(128, init='lecun_uniform', input_shape=(8,)))\n",
  120. " model.add(Activation('relu'))\n",
  121. "# model.add(Dropout(0.3)) #I'm not using dropout, but maybe you wanna give it a try?\n",
  122. "\n",
  123. " model.add(Dense(256, init='lecun_uniform'))\n",
  124. " model.add(Activation('tanh'))\n",
  125. "# model.add(Dropout(0.5))\n",
  126. "\n",
  127. " model.add(Dense(1, init='lecun_uniform'))\n",
  128. " model.add(Activation('linear')) #linear output so we can have range of real-valued outputs\n",
  129. "\n",
  130. "# rms = RMSprop(lr=0.005)\n",
  131. "# sgd = SGD(lr=0.1, decay=0.0, momentum=0.0, nesterov=False)\n",
  132. "# try \"adam\"\n",
  133. "# adam = Adam(lr=0.0005)\n",
  134. " adamax = Adamax() #Adamax(lr=0.001)\n",
  135. " model.compile(loss='mse', optimizer=adamax)\n",
  136. "# model.summary()\n",
  137. " return model"
  138. ]
  139. },
  140. {
  141. "cell_type": "code",
  142. "execution_count": null,
  143. "metadata": {
  144. "collapsed": false
  145. },
  146. "outputs": [],
  147. "source": [
  148. "# Holds one nn for each action\n",
  149. "class Model:\n",
  150. " def __init__(self, env, scaler):\n",
  151. " self.env = env\n",
  152. " self.scaler = scaler\n",
  153. " self.models = []\n",
  154. " for i in range(env.action_space.n):\n",
  155. " model = create_nn() # one nn per action\n",
  156. " self.models.append(model) \n",
  157. "\n",
  158. " def predict(self, s):\n",
  159. " X = self.scaler.transform(np.atleast_2d(s))\n",
  160. " return np.array([m.predict(np.array(X), verbose=0)[0] for m in self.models])\n",
  161. "\n",
  162. " def update(self, s, a, G):\n",
  163. " X = self.scaler.transform(np.atleast_2d(s))\n",
  164. " self.models[a].fit(np.array(X), np.array([G]), nb_epoch=1, verbose=0)\n",
  165. "\n",
  166. " def sample_action(self, s, eps):\n",
  167. " if np.random.random() < eps:\n",
  168. " return self.env.action_space.sample()\n",
  169. " else:\n",
  170. " return np.argmax(self.predict(s))\n"
  171. ]
  172. },
  173. {
  174. "cell_type": "code",
  175. "execution_count": null,
  176. "metadata": {
  177. "collapsed": false
  178. },
  179. "outputs": [],
  180. "source": [
  181. "def play_one(env, model, eps, gamma):\n",
  182. " observation = env.reset()\n",
  183. " done = False\n",
  184. " full_reward_received = False\n",
  185. " totalreward = 0\n",
  186. " iters = 0\n",
  187. " while not done:\n",
  188. " action = model.sample_action(observation, eps)\n",
  189. " prev_observation = observation\n",
  190. " observation, reward, done, info = env.step(action)\n",
  191. " \n",
  192. " # update the model\n",
  193. " # standard Q learning TD(0)\n",
  194. " next = model.predict(observation)\n",
  195. " G = reward + gamma*np.max(next)\n",
  196. " model.update(prev_observation, action, G)\n",
  197. " totalreward += reward\n",
  198. " iters += 1\n",
  199. " \n",
  200. " return totalreward, iters\n"
  201. ]
  202. },
  203. {
  204. "cell_type": "code",
  205. "execution_count": null,
  206. "metadata": {
  207. "collapsed": false,
  208. "scrolled": false
  209. },
  210. "outputs": [],
  211. "source": [
  212. "model = Model(env, scaler)\n",
  213. "gamma = 0.99\n",
  214. "\n",
  215. "N = 8010\n",
  216. "totalrewards = np.empty(N)\n",
  217. "costs = np.empty(N)\n",
  218. "for n in range(N):\n",
  219. " eps = 1.0/np.sqrt(n+1)\n",
  220. " totalreward, iters = play_one(env, model, eps, gamma)\n",
  221. " totalrewards[n] = totalreward\n",
  222. " if n % 100 == 0:\n",
  223. " print(\"episode:\", n, \"iters\", iters, \"total reward:\", totalreward, \"eps:\", eps, \"avg reward (last 100):\", totalrewards[max(0, n-100):(n+1)].mean())\n",
  224. " if totalrewards[max(0, n-100):(n+1)].mean() >= 200:\n",
  225. " break\n",
  226. "\n",
  227. "print(\"avg reward for last 100 episodes:\", totalrewards[-100:].mean())\n",
  228. "print(\"total steps:\", totalrewards.sum())\n",
  229. "\n",
  230. "plt.plot(totalrewards)\n",
  231. "plt.title(\"Rewards\")\n",
  232. "plt.show()\n",
  233. "\n",
  234. "plot_running_avg(totalrewards)"
  235. ]
  236. },
  237. {
  238. "cell_type": "code",
  239. "execution_count": null,
  240. "metadata": {
  241. "collapsed": false
  242. },
  243. "outputs": [],
  244. "source": [
  245. "env.close()"
  246. ]
  247. },
  248. {
  249. "cell_type": "code",
  250. "execution_count": null,
  251. "metadata": {
  252. "collapsed": true
  253. },
  254. "outputs": [],
  255. "source": []
  256. }
  257. ],
  258. "metadata": {
  259. "anaconda-cloud": {},
  260. "kernelspec": {
  261. "display_name": "Python [conda env:tensorflow]",
  262. "language": "python",
  263. "name": "conda-env-tensorflow-py"
  264. },
  265. "language_info": {
  266. "codemirror_mode": {
  267. "name": "ipython",
  268. "version": 3
  269. },
  270. "file_extension": ".py",
  271. "mimetype": "text/x-python",
  272. "name": "python",
  273. "nbconvert_exporter": "python",
  274. "pygments_lexer": "ipython3",
  275. "version": "3.5.2"
  276. }
  277. },
  278. "nbformat": 4,
  279. "nbformat_minor": 1
  280. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement