Untitled

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Trabalho (traindata)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#Grupo 7 | 1110299 Marta Ferreira |  1180167 Mariana Oliveira | 1180149 Ana Lima"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Passos que temos que fazer: (Para retirar daqui depois)\n",
    "#\n",
    "#Carregar dados - e tratar os dados! temos de usar mais do que o que estamos a dar nas aulas\n",
    "#Split (treino/Teste)\n",
    "#criar standardscaler (standardizar, média 0 e desvio padrao 1)\n",
    "#treinar standardscaler\n",
    "#Transformar dados treino\n",
    "#treinar KNN\n",
    "#Transformar testset\n",
    "#Score/predict\n",
    "#Trabalho da Iris fizemos uma batota, porque normalizamos dados entes de os dividir (confirmar)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "\n",
    "from sklearn import preprocessing as pp\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import (\n",
    "        StandardScaler,\n",
    "        MinMaxScaler,\n",
    "        RobustScaler)\n",
    "\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.pipeline import Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>63.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>145.0</td>\n",
       "      <td>233.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>150.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>67.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>160.0</td>\n",
       "      <td>286.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>67.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>229.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>37.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>187.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>204.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>172.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       0    1    2      3      4    5    6      7    8    9   10   11   12  13\n",
       "Id                                                                            \n",
       "0   63.0  1.0  1.0  145.0  233.0  1.0  2.0  150.0  0.0  2.3  3.0  0.0  6.0   0\n",
       "1   67.0  1.0  4.0  160.0  286.0  0.0  2.0  108.0  1.0  1.5  2.0  3.0  3.0   2\n",
       "2   67.0  1.0  4.0  120.0  229.0  0.0  2.0  129.0  1.0  2.6  2.0  2.0  7.0   1\n",
       "3   37.0  1.0  3.0  130.0  250.0  0.0  0.0  187.0  0.0  3.5  3.0  0.0  3.0   0\n",
       "4   41.0  0.0  2.0  130.0  204.0  0.0  2.0  172.0  0.0  1.4  1.0  0.0  3.0   0"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Importação dos dados do ficheiro excel\n",
    "traindata = pd.read_csv(\"traindata.csv\", header=0, index_col=0) \n",
    "traindata.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(216, 14)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "traindata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>63.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>145.0</td>\n",
       "      <td>233.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>150.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>67.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>160.0</td>\n",
       "      <td>286.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>67.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>229.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>37.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>187.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>204.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>172.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       0    1    2      3      4    5    6      7    8    9   10   11   12  13\n",
       "Id                                                                            \n",
       "0   63.0  1.0  1.0  145.0  233.0  1.0  2.0  150.0  0.0  2.3  3.0  0.0  6.0   0\n",
       "1   67.0  1.0  4.0  160.0  286.0  0.0  2.0  108.0  1.0  1.5  2.0  3.0  3.0   2\n",
       "2   67.0  1.0  4.0  120.0  229.0  0.0  2.0  129.0  1.0  2.6  2.0  2.0  7.0   1\n",
       "3   37.0  1.0  3.0  130.0  250.0  0.0  0.0  187.0  0.0  3.5  3.0  0.0  3.0   0\n",
       "4   41.0  0.0  2.0  130.0  204.0  0.0  2.0  172.0  0.0  1.4  1.0  0.0  3.0   0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=pd.DataFrame(traindata)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "traindata.replace({'?':np.nan}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "traindata.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainset, testset=train_test_split(traindata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    93\n",
       "1    26\n",
       "3    19\n",
       "2    19\n",
       "4     2\n",
       "Name: 13, dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Contar os valores trainset. Fazer para a coluna 13, neste caso.\n",
    "trainset[\"13\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    22\n",
       "1    12\n",
       "4     7\n",
       "3     6\n",
       "2     6\n",
       "Name: 13, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#(No testset vamos ter que ver se os valores estão balanceados)\n",
    "testset[\"13\"].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Função Standardscaler (dados já divididos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Função Standardscaler - Transformar os dados de forma a que sua distribuição tem um valor  \n",
    "# médio igual a 0 e um desvio padrão de 1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.51112929,  0.73735291, -0.15951158, -0.150565  , -0.56516362,\n",
       "        -0.3683942 ,  1.04842767,  0.77092891, -0.63802794,  0.80364581,\n",
       "         0.61441966, -0.67320658, -0.80632771],\n",
       "       [-1.7414932 , -1.35620268, -0.15951158,  0.31769583, -0.44273717,\n",
       "        -0.3683942 , -0.97217839,  0.06973365, -0.63802794, -0.90921859,\n",
       "         0.61441966, -0.67320658, -0.80632771],\n",
       "       [ 1.94432934, -1.35620268,  0.89725263, -1.20415186, -1.89145018,\n",
       "        -0.3683942 , -0.97217839, -1.11353336, -0.63802794,  0.46107293,\n",
       "         0.61441966, -0.67320658, -0.80632771],\n",
       "       [ 0.33178198,  0.73735291,  0.89725263,  1.02008707,  0.69990971,\n",
       "        -0.3683942 ,  1.04842767, -1.68325451,  1.56732949, -0.39535927,\n",
       "         0.61441966,  0.50305546,  0.75716139],\n",
       "       [-0.93521952, -1.35620268,  0.89725263,  0.31769583,  0.02656423,\n",
       "        -0.3683942 ,  1.04842767,  0.06973365,  1.56732949, -0.90921859,\n",
       "         0.61441966, -0.67320658, -0.80632771],\n",
       "       [-0.4744917 , -1.35620268, -1.21627579, -0.73589103,  0.04696864,\n",
       "        -0.3683942 , -0.97217839,  0.50798069, -0.63802794,  0.03285683,\n",
       "        -0.91202919, -0.67320658, -0.80632771],\n",
       "       [ 0.33178198,  0.73735291, -1.21627579,  1.25421748, -0.19788426,\n",
       "        -0.3683942 ,  1.04842767,  0.5956301 , -0.63802794, -0.90921859,\n",
       "        -0.91202919,  0.50305546, -0.80632771],\n",
       "       [-1.9718571 ,  0.73735291, -0.15951158, -0.150565  ,  0.16939509,\n",
       "        -0.3683942 , -0.97217839,  1.60359829, -0.63802794,  2.08829412,\n",
       "         2.14086851, -0.67320658, -0.80632771],\n",
       "       [-0.58967365, -1.35620268, -1.21627579,  0.08356541,  0.59788767,\n",
       "        -0.3683942 , -0.97217839,  0.50798069, -0.63802794, -0.90921859,\n",
       "         0.61441966, -0.67320658, -0.80632771],\n",
       "       [ 0.44696394,  0.73735291,  0.89725263,  1.02008707,  0.57748326,\n",
       "        -0.3683942 ,  1.04842767, -1.72707921,  1.56732949, -0.22407283,\n",
       "        -0.91202919, -0.67320658,  1.27832442],\n",
       "       [ 0.67732784,  0.73735291,  0.89725263, -0.150565  ,  0.23060832,\n",
       "        -0.3683942 , -0.97217839, -0.28086398,  1.56732949,  0.28978649,\n",
       "        -0.91202919,  0.50305546,  1.27832442],\n",
       "       [-1.05040147,  0.73735291,  0.89725263, -1.67241268, -0.68759007,\n",
       "        -0.3683942 ,  1.04842767, -0.10556517,  1.56732949,  1.66007802,\n",
       "         0.61441966, -0.67320658, -0.80632771]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scaler=StandardScaler()\n",
    "scaler.fit(trainset.iloc[:,:-1])\n",
    "trainX=scaler.transform(trainset.iloc[:,:-1])#Fizemos o fit e o transform juntos, mas podia ser separados\n",
    "trainX[:12] #Queremos normalizar tudo menos a última coluna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainY=trainset[\"13\"] #isto é o que eu quero prever"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# KNN (Derterminação do vizinho mais próximo)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
       "                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,\n",
       "                     weights='uniform')"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knnmodel=KNeighborsClassifier(n_neighbors=3)#ver os vizinhos porque são 3??\n",
    "knnmodel.fit(trainX, trainY) #temos de fazer o fit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "testX=scaler.transform(testset.iloc[:,:-1])\n",
    "testY=testset[\"13\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4716981132075472"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "knnmodel.score(testX,testY) #normalização feita com base nos dados de treino. \n",
    "#O testY é o que eu quero prever. \n",
    "#Está a analisar a qualidade do teu resultado, em cada 100 acertei 94"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "#knnmodel.predict(testset.iloc[:,:-1]) (ver o que colocar aqui)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "ppl=make_pipeline(StandardScaler(),KNeighborsClassifier(n_neighbors=1)) #ver isto dos vizinhos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "ppl2=Pipeline(steps=[(\"scaler\",StandardScaler()),(\"3NN\",KNeighborsClassifier(n_neighbors=3))])\n",
    "#outra forma de se resolver o ppl2. ver a questão dos vizinhos como se faz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(memory=None,\n",
       "         steps=[('standardscaler',\n",
       "                 StandardScaler(copy=True, with_mean=True, with_std=True)),\n",
       "                ('kneighborsclassifier',\n",
       "                 KNeighborsClassifier(algorithm='auto', leaf_size=30,\n",
       "                                      metric='minkowski', metric_params=None,\n",
       "                                      n_jobs=None, n_neighbors=1, p=2,\n",
       "                                      weights='uniform'))],\n",
       "         verbose=False)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ppl.fit(trainX,trainY) \n",
    "#o pipeline é uma linha de transformação \n",
    "#- em vez da standardScaler - faz o fit transform para varios modelos?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5283018867924528"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ppl.score(testX,testY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(memory=None,\n",
       "         steps=[('scaler',\n",
       "                 StandardScaler(copy=True, with_mean=True, with_std=True)),\n",
       "                ('3NN',\n",
       "                 KNeighborsClassifier(algorithm='auto', leaf_size=30,\n",
       "                                      metric='minkowski', metric_params=None,\n",
       "                                      n_jobs=None, n_neighbors=3, p=2,\n",
       "                                      weights='uniform'))],\n",
       "         verbose=False)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ppl2.fit(trainX,trainY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.49056603773584906"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ppl2.score(testX,testY)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Quais as previsões erradas? (Vamos ver onde erramos)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 3, 0, 3, 2], dtype=int64)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preds=ppl.predict(testX)\n",
    "preds[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Id\n",
       "30     False\n",
       "40      True\n",
       "0      False\n",
       "213    False\n",
       "174     True\n",
       "Name: 13, dtype: bool"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "errado=(preds !=testY)\n",
    "errado.head()\n",
    "#Para identificarmos os objetos em que erramos a previsão\n",
    "#True é porque está errado e false é onde está certo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py:8682: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self._update_inplace(new_data)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3325: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  exec(code_obj, self.user_global_ns, self.user_ns)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Id\n",
       "30        0\n",
       "40     FAIL\n",
       "0         0\n",
       "213       3\n",
       "174    FAIL\n",
       "298       1\n",
       "280       2\n",
       "19        0\n",
       "177       1\n",
       "240       0\n",
       "Name: 13, dtype: object"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "colorseries=testY\n",
    "colorseries[errado==True]=\"FAIL\"\n",
    "colorseries.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}