Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Trabalho (traindata)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#Grupo 7 | 1110299 Marta Ferreira | 1180167 Mariana Oliveira | 1180149 Ana Lima"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Passos que temos que fazer: (Para retirar daqui depois)\n",
- "#\n",
- "#Carregar dados - e tratar os dados! temos de usar mais do que o que estamos a dar nas aulas\n",
- "#Split (treino/Teste)\n",
- "#criar standardscaler (standardizar, média 0 e desvio padrao 1)\n",
- "#treinar standardscaler\n",
- "#Transformar dados treino\n",
- "#treinar KNN\n",
- "#Transformar testset\n",
- "#Score/predict\n",
- "#Trabalho da Iris fizemos uma batota, porque normalizamos dados entes de os dividir (confirmar)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "import pandas as pd\n",
- "\n",
- "from sklearn import preprocessing as pp\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.preprocessing import (\n",
- " StandardScaler,\n",
- " MinMaxScaler,\n",
- " RobustScaler)\n",
- "\n",
- "from sklearn.neighbors import KNeighborsClassifier\n",
- "from sklearn.pipeline import make_pipeline\n",
- "from sklearn.pipeline import Pipeline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>0</th>\n",
- " <th>1</th>\n",
- " <th>2</th>\n",
- " <th>3</th>\n",
- " <th>4</th>\n",
- " <th>5</th>\n",
- " <th>6</th>\n",
- " <th>7</th>\n",
- " <th>8</th>\n",
- " <th>9</th>\n",
- " <th>10</th>\n",
- " <th>11</th>\n",
- " <th>12</th>\n",
- " <th>13</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>Id</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>63.0</td>\n",
- " <td>1.0</td>\n",
- " <td>1.0</td>\n",
- " <td>145.0</td>\n",
- " <td>233.0</td>\n",
- " <td>1.0</td>\n",
- " <td>2.0</td>\n",
- " <td>150.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.3</td>\n",
- " <td>3.0</td>\n",
- " <td>0.0</td>\n",
- " <td>6.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>67.0</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " <td>160.0</td>\n",
- " <td>286.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>108.0</td>\n",
- " <td>1.0</td>\n",
- " <td>1.5</td>\n",
- " <td>2.0</td>\n",
- " <td>3.0</td>\n",
- " <td>3.0</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>67.0</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " <td>120.0</td>\n",
- " <td>229.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>129.0</td>\n",
- " <td>1.0</td>\n",
- " <td>2.6</td>\n",
- " <td>2.0</td>\n",
- " <td>2.0</td>\n",
- " <td>7.0</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>37.0</td>\n",
- " <td>1.0</td>\n",
- " <td>3.0</td>\n",
- " <td>130.0</td>\n",
- " <td>250.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>187.0</td>\n",
- " <td>0.0</td>\n",
- " <td>3.5</td>\n",
- " <td>3.0</td>\n",
- " <td>0.0</td>\n",
- " <td>3.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>41.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>130.0</td>\n",
- " <td>204.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>172.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.4</td>\n",
- " <td>1.0</td>\n",
- " <td>0.0</td>\n",
- " <td>3.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " 0 1 2 3 4 5 6 7 8 9 10 11 12 13\n",
- "Id \n",
- "0 63.0 1.0 1.0 145.0 233.0 1.0 2.0 150.0 0.0 2.3 3.0 0.0 6.0 0\n",
- "1 67.0 1.0 4.0 160.0 286.0 0.0 2.0 108.0 1.0 1.5 2.0 3.0 3.0 2\n",
- "2 67.0 1.0 4.0 120.0 229.0 0.0 2.0 129.0 1.0 2.6 2.0 2.0 7.0 1\n",
- "3 37.0 1.0 3.0 130.0 250.0 0.0 0.0 187.0 0.0 3.5 3.0 0.0 3.0 0\n",
- "4 41.0 0.0 2.0 130.0 204.0 0.0 2.0 172.0 0.0 1.4 1.0 0.0 3.0 0"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Importação dos dados do ficheiro excel\n",
- "traindata = pd.read_csv(\"traindata.csv\", header=0, index_col=0) \n",
- "traindata.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(216, 14)"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "traindata.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>0</th>\n",
- " <th>1</th>\n",
- " <th>2</th>\n",
- " <th>3</th>\n",
- " <th>4</th>\n",
- " <th>5</th>\n",
- " <th>6</th>\n",
- " <th>7</th>\n",
- " <th>8</th>\n",
- " <th>9</th>\n",
- " <th>10</th>\n",
- " <th>11</th>\n",
- " <th>12</th>\n",
- " <th>13</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>Id</th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>63.0</td>\n",
- " <td>1.0</td>\n",
- " <td>1.0</td>\n",
- " <td>145.0</td>\n",
- " <td>233.0</td>\n",
- " <td>1.0</td>\n",
- " <td>2.0</td>\n",
- " <td>150.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.3</td>\n",
- " <td>3.0</td>\n",
- " <td>0.0</td>\n",
- " <td>6.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>67.0</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " <td>160.0</td>\n",
- " <td>286.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>108.0</td>\n",
- " <td>1.0</td>\n",
- " <td>1.5</td>\n",
- " <td>2.0</td>\n",
- " <td>3.0</td>\n",
- " <td>3.0</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>67.0</td>\n",
- " <td>1.0</td>\n",
- " <td>4.0</td>\n",
- " <td>120.0</td>\n",
- " <td>229.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>129.0</td>\n",
- " <td>1.0</td>\n",
- " <td>2.6</td>\n",
- " <td>2.0</td>\n",
- " <td>2.0</td>\n",
- " <td>7.0</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>37.0</td>\n",
- " <td>1.0</td>\n",
- " <td>3.0</td>\n",
- " <td>130.0</td>\n",
- " <td>250.0</td>\n",
- " <td>0.0</td>\n",
- " <td>0.0</td>\n",
- " <td>187.0</td>\n",
- " <td>0.0</td>\n",
- " <td>3.5</td>\n",
- " <td>3.0</td>\n",
- " <td>0.0</td>\n",
- " <td>3.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4</th>\n",
- " <td>41.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>130.0</td>\n",
- " <td>204.0</td>\n",
- " <td>0.0</td>\n",
- " <td>2.0</td>\n",
- " <td>172.0</td>\n",
- " <td>0.0</td>\n",
- " <td>1.4</td>\n",
- " <td>1.0</td>\n",
- " <td>0.0</td>\n",
- " <td>3.0</td>\n",
- " <td>0</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " 0 1 2 3 4 5 6 7 8 9 10 11 12 13\n",
- "Id \n",
- "0 63.0 1.0 1.0 145.0 233.0 1.0 2.0 150.0 0.0 2.3 3.0 0.0 6.0 0\n",
- "1 67.0 1.0 4.0 160.0 286.0 0.0 2.0 108.0 1.0 1.5 2.0 3.0 3.0 2\n",
- "2 67.0 1.0 4.0 120.0 229.0 0.0 2.0 129.0 1.0 2.6 2.0 2.0 7.0 1\n",
- "3 37.0 1.0 3.0 130.0 250.0 0.0 0.0 187.0 0.0 3.5 3.0 0.0 3.0 0\n",
- "4 41.0 0.0 2.0 130.0 204.0 0.0 2.0 172.0 0.0 1.4 1.0 0.0 3.0 0"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df=pd.DataFrame(traindata)\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "traindata.replace({'?':np.nan}, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "traindata.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "trainset, testset=train_test_split(traindata)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 93\n",
- "1 26\n",
- "3 19\n",
- "2 19\n",
- "4 2\n",
- "Name: 13, dtype: int64"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#Contar os valores trainset. Fazer para a coluna 13, neste caso.\n",
- "trainset[\"13\"].value_counts()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0 22\n",
- "1 12\n",
- "4 7\n",
- "3 6\n",
- "2 6\n",
- "Name: 13, dtype: int64"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#(No testset vamos ter que ver se os valores estão balanceados)\n",
- "testset[\"13\"].value_counts()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Função Standardscaler (dados já divididos)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Função Standardscaler - Transformar os dados de forma a que sua distribuição tem um valor \n",
- "# médio igual a 0 e um desvio padrão de 1."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[-1.51112929, 0.73735291, -0.15951158, -0.150565 , -0.56516362,\n",
- " -0.3683942 , 1.04842767, 0.77092891, -0.63802794, 0.80364581,\n",
- " 0.61441966, -0.67320658, -0.80632771],\n",
- " [-1.7414932 , -1.35620268, -0.15951158, 0.31769583, -0.44273717,\n",
- " -0.3683942 , -0.97217839, 0.06973365, -0.63802794, -0.90921859,\n",
- " 0.61441966, -0.67320658, -0.80632771],\n",
- " [ 1.94432934, -1.35620268, 0.89725263, -1.20415186, -1.89145018,\n",
- " -0.3683942 , -0.97217839, -1.11353336, -0.63802794, 0.46107293,\n",
- " 0.61441966, -0.67320658, -0.80632771],\n",
- " [ 0.33178198, 0.73735291, 0.89725263, 1.02008707, 0.69990971,\n",
- " -0.3683942 , 1.04842767, -1.68325451, 1.56732949, -0.39535927,\n",
- " 0.61441966, 0.50305546, 0.75716139],\n",
- " [-0.93521952, -1.35620268, 0.89725263, 0.31769583, 0.02656423,\n",
- " -0.3683942 , 1.04842767, 0.06973365, 1.56732949, -0.90921859,\n",
- " 0.61441966, -0.67320658, -0.80632771],\n",
- " [-0.4744917 , -1.35620268, -1.21627579, -0.73589103, 0.04696864,\n",
- " -0.3683942 , -0.97217839, 0.50798069, -0.63802794, 0.03285683,\n",
- " -0.91202919, -0.67320658, -0.80632771],\n",
- " [ 0.33178198, 0.73735291, -1.21627579, 1.25421748, -0.19788426,\n",
- " -0.3683942 , 1.04842767, 0.5956301 , -0.63802794, -0.90921859,\n",
- " -0.91202919, 0.50305546, -0.80632771],\n",
- " [-1.9718571 , 0.73735291, -0.15951158, -0.150565 , 0.16939509,\n",
- " -0.3683942 , -0.97217839, 1.60359829, -0.63802794, 2.08829412,\n",
- " 2.14086851, -0.67320658, -0.80632771],\n",
- " [-0.58967365, -1.35620268, -1.21627579, 0.08356541, 0.59788767,\n",
- " -0.3683942 , -0.97217839, 0.50798069, -0.63802794, -0.90921859,\n",
- " 0.61441966, -0.67320658, -0.80632771],\n",
- " [ 0.44696394, 0.73735291, 0.89725263, 1.02008707, 0.57748326,\n",
- " -0.3683942 , 1.04842767, -1.72707921, 1.56732949, -0.22407283,\n",
- " -0.91202919, -0.67320658, 1.27832442],\n",
- " [ 0.67732784, 0.73735291, 0.89725263, -0.150565 , 0.23060832,\n",
- " -0.3683942 , -0.97217839, -0.28086398, 1.56732949, 0.28978649,\n",
- " -0.91202919, 0.50305546, 1.27832442],\n",
- " [-1.05040147, 0.73735291, 0.89725263, -1.67241268, -0.68759007,\n",
- " -0.3683942 , 1.04842767, -0.10556517, 1.56732949, 1.66007802,\n",
- " 0.61441966, -0.67320658, -0.80632771]])"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "scaler=StandardScaler()\n",
- "scaler.fit(trainset.iloc[:,:-1])\n",
- "trainX=scaler.transform(trainset.iloc[:,:-1])#Fizemos o fit e o transform juntos, mas podia ser separados\n",
- "trainX[:12] #Queremos normalizar tudo menos a última coluna"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "trainY=trainset[\"13\"] #isto é o que eu quero prever"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# KNN (Derterminação do vizinho mais próximo)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
- " metric_params=None, n_jobs=None, n_neighbors=3, p=2,\n",
- " weights='uniform')"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "knnmodel=KNeighborsClassifier(n_neighbors=3)#ver os vizinhos porque são 3??\n",
- "knnmodel.fit(trainX, trainY) #temos de fazer o fit"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [],
- "source": [
- "testX=scaler.transform(testset.iloc[:,:-1])\n",
- "testY=testset[\"13\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.4716981132075472"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "knnmodel.score(testX,testY) #normalização feita com base nos dados de treino. \n",
- "#O testY é o que eu quero prever. \n",
- "#Está a analisar a qualidade do teu resultado, em cada 100 acertei 94"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [],
- "source": [
- "#knnmodel.predict(testset.iloc[:,:-1]) (ver o que colocar aqui)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "ppl=make_pipeline(StandardScaler(),KNeighborsClassifier(n_neighbors=1)) #ver isto dos vizinhos"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [],
- "source": [
- "ppl2=Pipeline(steps=[(\"scaler\",StandardScaler()),(\"3NN\",KNeighborsClassifier(n_neighbors=3))])\n",
- "#outra forma de se resolver o ppl2. ver a questão dos vizinhos como se faz"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Pipeline(memory=None,\n",
- " steps=[('standardscaler',\n",
- " StandardScaler(copy=True, with_mean=True, with_std=True)),\n",
- " ('kneighborsclassifier',\n",
- " KNeighborsClassifier(algorithm='auto', leaf_size=30,\n",
- " metric='minkowski', metric_params=None,\n",
- " n_jobs=None, n_neighbors=1, p=2,\n",
- " weights='uniform'))],\n",
- " verbose=False)"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ppl.fit(trainX,trainY) \n",
- "#o pipeline é uma linha de transformação \n",
- "#- em vez da standardScaler - faz o fit transform para varios modelos?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.5283018867924528"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ppl.score(testX,testY)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Pipeline(memory=None,\n",
- " steps=[('scaler',\n",
- " StandardScaler(copy=True, with_mean=True, with_std=True)),\n",
- " ('3NN',\n",
- " KNeighborsClassifier(algorithm='auto', leaf_size=30,\n",
- " metric='minkowski', metric_params=None,\n",
- " n_jobs=None, n_neighbors=3, p=2,\n",
- " weights='uniform'))],\n",
- " verbose=False)"
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ppl2.fit(trainX,trainY)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.49056603773584906"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ppl2.score(testX,testY)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Quais as previsões erradas? (Vamos ver onde erramos)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([0, 3, 0, 3, 2], dtype=int64)"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "preds=ppl.predict(testX)\n",
- "preds[:5]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Id\n",
- "30 False\n",
- "40 True\n",
- "0 False\n",
- "213 False\n",
- "174 True\n",
- "Name: 13, dtype: bool"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "errado=(preds !=testY)\n",
- "errado.head()\n",
- "#Para identificarmos os objetos em que erramos a previsão\n",
- "#True é porque está errado e false é onde está certo"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
- " \n",
- "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py:8682: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
- " self._update_inplace(new_data)\n",
- "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3325: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
- "\n",
- "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
- " exec(code_obj, self.user_global_ns, self.user_ns)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "Id\n",
- "30 0\n",
- "40 FAIL\n",
- "0 0\n",
- "213 3\n",
- "174 FAIL\n",
- "298 1\n",
- "280 2\n",
- "19 0\n",
- "177 1\n",
- "240 0\n",
- "Name: 13, dtype: object"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "colorseries=testY\n",
- "colorseries[errado==True]=\"FAIL\"\n",
- "colorseries.head(10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement