Guest User

Untitled

a guest
Jan 16th, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.35 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 44,
  6. "metadata": {},
  7. "outputs": [
  8. {
  9. "name": "stdout",
  10. "output_type": "stream",
  11. "text": [
  12. "Populating the interactive namespace from numpy and matplotlib\n"
  13. ]
  14. }
  15. ],
  16. "source": [
  17. "%pylab inline"
  18. ]
  19. },
  20. {
  21. "cell_type": "code",
  22. "execution_count": 45,
  23. "metadata": {
  24. "collapsed": true
  25. },
  26. "outputs": [],
  27. "source": [
  28. "import pandas as pd"
  29. ]
  30. },
  31. {
  32. "cell_type": "code",
  33. "execution_count": 47,
  34. "metadata": {
  35. "collapsed": true
  36. },
  37. "outputs": [],
  38. "source": [
  39. "def make_data(samples, x_dims):\n",
  40. " global x, y, tx, ty, vx, vy, features\n",
  41. " y = pd.DataFrame({'y': np.random.normal(0, 10, size=samples)\n",
  42. " })\n",
  43. "\n",
  44. " x = pd.DataFrame()\n",
  45. "\n",
  46. " for i in range(x_dims):\n",
  47. " x['f{0}'.format(i)] = np.random.normal(0, 10, size=samples)\n",
  48. " x['f0'] = y * 123.\n",
  49. " \n",
  50. " HELD_OUT_RATIO = 0.2\n",
  51. " slicer = int(len(x) * HELD_OUT_RATIO)\n",
  52. "\n",
  53. " train_x = x[slicer:]\n",
  54. " train_y = y[slicer:]\n",
  55. "\n",
  56. " val_x = x[:slicer]\n",
  57. " val_y = y[:slicer]\n",
  58. " \n",
  59. " tx = np.array(train_x.values)\n",
  60. " tx = tx.reshape((int(len(train_x)/1), x_dims))\n",
  61. " ty = train_y.values\n",
  62. " ty = ty.reshape((len(ty), 1))\n",
  63. " vx = np.array(val_x.values)\n",
  64. " vx = vx.reshape((int(len(val_x)/1), x_dims))\n",
  65. " vy = val_y.values\n",
  66. " vy = vy.reshape((len(vy), 1))\n",
  67. " features = (0,x_dims)\n",
  68. " return x"
  69. ]
  70. },
  71. {
  72. "cell_type": "code",
  73. "execution_count": 48,
  74. "metadata": {},
  75. "outputs": [
  76. {
  77. "data": {
  78. "text/html": [
  79. "<div>\n",
  80. "<style>\n",
  81. " .dataframe thead tr:only-child th {\n",
  82. " text-align: right;\n",
  83. " }\n",
  84. "\n",
  85. " .dataframe thead th {\n",
  86. " text-align: left;\n",
  87. " }\n",
  88. "\n",
  89. " .dataframe tbody tr th {\n",
  90. " vertical-align: top;\n",
  91. " }\n",
  92. "</style>\n",
  93. "<table border=\"1\" class=\"dataframe\">\n",
  94. " <thead>\n",
  95. " <tr style=\"text-align: right;\">\n",
  96. " <th></th>\n",
  97. " <th>f0</th>\n",
  98. " </tr>\n",
  99. " </thead>\n",
  100. " <tbody>\n",
  101. " <tr>\n",
  102. " <th>0</th>\n",
  103. " <td>-1372.739221</td>\n",
  104. " </tr>\n",
  105. " <tr>\n",
  106. " <th>1</th>\n",
  107. " <td>-243.376695</td>\n",
  108. " </tr>\n",
  109. " <tr>\n",
  110. " <th>2</th>\n",
  111. " <td>2160.631416</td>\n",
  112. " </tr>\n",
  113. " <tr>\n",
  114. " <th>3</th>\n",
  115. " <td>-451.650977</td>\n",
  116. " </tr>\n",
  117. " <tr>\n",
  118. " <th>4</th>\n",
  119. " <td>-2080.222461</td>\n",
  120. " </tr>\n",
  121. " </tbody>\n",
  122. "</table>\n",
  123. "</div>"
  124. ],
  125. "text/plain": [
  126. " f0\n",
  127. "0 -1372.739221\n",
  128. "1 -243.376695\n",
  129. "2 2160.631416\n",
  130. "3 -451.650977\n",
  131. "4 -2080.222461"
  132. ]
  133. },
  134. "execution_count": 48,
  135. "metadata": {},
  136. "output_type": "execute_result"
  137. }
  138. ],
  139. "source": [
  140. "make_data(samples=1000, x_dims=1).head(5)"
  141. ]
  142. },
  143. {
  144. "cell_type": "code",
  145. "execution_count": 49,
  146. "metadata": {
  147. "collapsed": true
  148. },
  149. "outputs": [],
  150. "source": [
  151. "def test_model(model):\n",
  152. " model.fit(tx, ty.transpose()[0])\n",
  153. " r = pd.DataFrame(vy)\n",
  154. " r['predicted'] = model.predict(vx)\n",
  155. " return (r.predicted - r[0]).pow(2).mean()\n",
  156. " "
  157. ]
  158. },
  159. {
  160. "cell_type": "code",
  161. "execution_count": 50,
  162. "metadata": {
  163. "collapsed": true
  164. },
  165. "outputs": [],
  166. "source": [
  167. "from sklearn import linear_model\n",
  168. "from sklearn.ensemble import RandomForestRegressor\n",
  169. "from sklearn.svm import SVR"
  170. ]
  171. },
  172. {
  173. "cell_type": "code",
  174. "execution_count": 51,
  175. "metadata": {
  176. "collapsed": true
  177. },
  178. "outputs": [],
  179. "source": [
  180. "models = [\n",
  181. " linear_model.LinearRegression(),\n",
  182. " RandomForestRegressor(),\n",
  183. " linear_model.Lasso(),\n",
  184. " SVR(kernel='rbf'),\n",
  185. " SVR(kernel='linear')\n",
  186. "]"
  187. ]
  188. },
  189. {
  190. "cell_type": "code",
  191. "execution_count": 52,
  192. "metadata": {},
  193. "outputs": [
  194. {
  195. "name": "stdout",
  196. "output_type": "stream",
  197. "text": [
  198. "LinearRegression 1.9294854828388685e-30\n",
  199. "RandomForestRegressor 0.03130526902061943\n",
  200. "Lasso 5.718565928185423e-07\n",
  201. "SVR 100.3770700128741\n",
  202. "SVR 0.0010088304997091927\n"
  203. ]
  204. }
  205. ],
  206. "source": [
  207. "for model in models:\n",
  208. " name = str(model)\n",
  209. " name = name[:name.find('(')]\n",
  210. " print(name, test_model(model))"
  211. ]
  212. }
  213. ],
  214. "metadata": {
  215. "kernelspec": {
  216. "display_name": "Python [conda env:python3]",
  217. "language": "python",
  218. "name": "conda-env-python3-py"
  219. },
  220. "language_info": {
  221. "codemirror_mode": {
  222. "name": "ipython",
  223. "version": 3
  224. },
  225. "file_extension": ".py",
  226. "mimetype": "text/x-python",
  227. "name": "python",
  228. "nbconvert_exporter": "python",
  229. "pygments_lexer": "ipython3",
  230. "version": "3.5.4"
  231. }
  232. },
  233. "nbformat": 4,
  234. "nbformat_minor": 2
  235. }
Add Comment
Please, Sign In to add comment