Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Vamos utilizar o KNN e o train_test_split\n",
- "# Importe os módulos necessários\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "colunas = ['buying',\n",
- "'maint',\n",
- "'doors',\n",
- "'persons',\n",
- "'lug_boot',\n",
- "'safety',\n",
- "'y']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Leia o dataset\n",
- "data = _____('data_car.csv', names=colunas)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Veja os primeiros 5 dados\n",
- "data.____"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Veja se tem dado nulo\n",
- "data.___.___"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Transforme os dados categóricos em númericos\n",
- "y_mapping = __________\n",
- "\n",
- "data[___] = data[___].____"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Crie o conjunto de rótulos e o conjunto de features\n",
- "y = ______\n",
- "\n",
- "X = ______"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Construa o conjunto de treino e teste usando:\n",
- "# - 0,2 para o tamanho do conjunto de testes \n",
- "# - 42 para o estado aleatório\n",
- "# - Estratifique a divisão com o conjunto de rótulos \n",
- "\n",
- "X_train, X_test, y_train, y_test = ______(___, ___, test_size=___, random_state=___, stratify=___)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Gerando um gráfico com a acuracia da resposta do modelo com o conjunto de treino e teste e vendo qual a quantidade de vizinhos da um melhor resultado e quais valores da overfiting"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Verifiquem a resposta do gráfico com 100 e depois\n",
- "# façam um gráfico com 30\n",
- "\n",
- "neighbors = np.arange(1, 100)\n",
- "train_accuracy = np.empty(len(neighbors))\n",
- "test_accuracy = np.empty(len(neighbors))\n",
- "\n",
- "# Coloque o numero de vizinhos igual a k do modelo do KNN\n",
- "for i, k in enumerate(neighbors):\n",
- " knn = _____\n",
- " knn.fit(___, ___)\n",
- " train_accuracy[i] = knn.score(___, ___)\n",
- " test_accuracy[i] = knn.score(___, ___)\n",
- "\n",
- "# gerando o grafico\n",
- "plt.title('k-NN: Número de vizinhos')\n",
- "plt.plot(neighbors, test_accuracy, label = 'Accuracia teste')\n",
- "plt.plot(neighbors, train_accuracy, label = 'Accuracia treino')\n",
- "plt.legend()\n",
- "plt.xlabel('Número de vizinhos')\n",
- "plt.ylabel('Accuracia')\n",
- "plt.show()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement