Untitled

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    }
   ],
   "source": [
    "%pylab inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def make_data(samples, x_dims):\n",
    "    global x, y, tx, ty, vx, vy, features\n",
    "    y = pd.DataFrame({'y': np.random.normal(0, 10, size=samples)\n",
    "                  })\n",
    "\n",
    "    x = pd.DataFrame()\n",
    "\n",
    "    for i in range(x_dims):\n",
    "        x['f{0}'.format(i)] = np.random.normal(0, 10, size=samples)\n",
    "    x['f0'] = y * 123.\n",
    "    \n",
    "    HELD_OUT_RATIO = 0.2\n",
    "    slicer = int(len(x) * HELD_OUT_RATIO)\n",
    "\n",
    "    train_x = x[slicer:]\n",
    "    train_y = y[slicer:]\n",
    "\n",
    "    val_x = x[:slicer]\n",
    "    val_y = y[:slicer]\n",
    "    \n",
    "    tx = np.array(train_x.values)\n",
    "    tx = tx.reshape((int(len(train_x)/1), x_dims))\n",
    "    ty = train_y.values\n",
    "    ty = ty.reshape((len(ty), 1))\n",
    "    vx = np.array(val_x.values)\n",
    "    vx = vx.reshape((int(len(val_x)/1), x_dims))\n",
    "    vy = val_y.values\n",
    "    vy = vy.reshape((len(vy), 1))\n",
    "    features = (0,x_dims)\n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>f0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1372.739221</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-243.376695</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2160.631416</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-451.650977</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-2080.222461</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            f0\n",
       "0 -1372.739221\n",
       "1  -243.376695\n",
       "2  2160.631416\n",
       "3  -451.650977\n",
       "4 -2080.222461"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "make_data(samples=1000, x_dims=1).head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def test_model(model):\n",
    "    model.fit(tx, ty.transpose()[0])\n",
    "    r = pd.DataFrame(vy)\n",
    "    r['predicted'] = model.predict(vx)\n",
    "    return (r.predicted - r[0]).pow(2).mean()\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import  linear_model\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.svm import SVR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "models = [\n",
    "    linear_model.LinearRegression(),\n",
    "    RandomForestRegressor(),\n",
    "    linear_model.Lasso(),\n",
    "    SVR(kernel='rbf'),\n",
    "    SVR(kernel='linear')\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LinearRegression 1.9294854828388685e-30\n",
      "RandomForestRegressor 0.03130526902061943\n",
      "Lasso 5.718565928185423e-07\n",
      "SVR 100.3770700128741\n",
      "SVR 0.0010088304997091927\n"
     ]
    }
   ],
   "source": [
    "for model in models:\n",
    "    name = str(model)\n",
    "    name = name[:name.find('(')]\n",
    "    print(name, test_model(model))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:python3]",
   "language": "python",
   "name": "conda-env-python3-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}