Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Estudo de Caso 1 - SVMs
- ####### Importando e Preparando o Dataset #######
- # Import pandas
- import pandas as pd
- # Carrega o dataset
- df = pd.read_csv('iris.csv')
- # Remove a coluna ID
- df = df.drop(['Id'],axis=1)
- # Define a variável alvo
- target = df['Species']
- # Cria um conjunto
- s = set()
- # Valores possíveis da variável target
- for val in target:
- s.add(val)
- # Converte para lista
- s = list(s)
- # Separa em dados de treino e de teste
- rows = list(range(100,150))
- df = df.drop(df.index[rows])
- ####### Plot dos Dados #######
- # Import matplotlib
- import matplotlib.pyplot as plt
- # Define x e y (entrada e saída)
- x = df['SepalLengthCm']
- y = df['PetalLengthCm']
- # Dados referentes a uma classe de flores
- setosa_x = x[:50]
- setosa_y = y[:50]
- # Dados referentes a outra classe de flores
- versicolor_x = x[50:]
- versicolor_y = y[50:]
- # Plot
- plt.figure(figsize=(8,6))
- plt.scatter(setosa_x,setosa_y,marker='+',color='green')
- plt.scatter(versicolor_x,versicolor_y,marker='_',color='red')
- plt.show()
- ####### Algoritmo SVM #######
- from sklearn.utils import shuffle
- from sklearn.cross_validation import train_test_split
- import numpy as np
- # Descarta o restante dos recursos e extraia os valores de destino
- df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1)
- Y = []
- target = df['Species']
- for val in target:
- if(val == 'Iris-setosa'):
- Y.append(-1)
- else:
- Y.append(1)
- df = df.drop(['Species'],axis=1)
- X = df.values.tolist()
- # Embaralhe e divida os dados de treinamento e testes
- X, Y = shuffle(X,Y)
- x_train = []
- y_train = []
- x_test = []
- y_test = []
- x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.9)
- # Converte tudo para array NumPy
- x_train = np.array(x_train)
- y_train = np.array(y_train)
- x_test = np.array(x_test)
- y_test = np.array(y_test)
- # Ajusta o shape
- y_train = y_train.reshape(90,1)
- y_test = y_test.reshape(10,1)
- # Dados de treino das flores 1 e 2
- train_f1 = x_train[:,0]
- train_f2 = x_train[:,1]
- # Ajusta o shape dos dados das flores 1 e 2
- train_f1 = train_f1.reshape(90,1)
- train_f2 = train_f2.reshape(90,1)
- # Cria o vetor de pesos
- w1 = np.zeros((90,1))
- w2 = np.zeros((90,1))
- # Hiperparâmetros
- epochs = 1
- alpha = 0.0001
- # Treinamento
- while(epochs < 10000):
- y = w1 * train_f1 + w2 * train_f2
- prod = y * y_train
- print(epochs)
- count = 0
- for val in prod:
- if(val >= 1):
- cost = 0
- w1 = w1 - alpha * (2 * 1/epochs * w1)
- w2 = w2 - alpha * (2 * 1/epochs * w2)
- else:
- cost = 1 - val
- w1 = w1 + alpha * (train_f1[count] * y_train[count] - 2 * 1/epochs * w1)
- w2 = w2 + alpha * (train_f2[count] * y_train[count] - 2 * 1/epochs * w2)
- count += 1
- epochs += 1
- ####### Avalia o Modelo #######
- from sklearn.svm import SVC
- from sklearn.metrics import accuracy_score
- import warnings
- warnings.filterwarnings("ignore")
- # Agora, cortamos os pesos, pois os dados de teste contêm apenas 10 pontos de dados
- index = list(range(10,90))
- w1 = np.delete(w1,index)
- w2 = np.delete(w2,index)
- # Ajusta o shape
- w1 = w1.reshape(10,1)
- w2 = w2.reshape(10,1)
- # Extrair os recursos de dados de teste
- test_f1 = x_test[:,0]
- test_f2 = x_test[:,1]
- test_f1 = test_f1.reshape(10,1)
- test_f2 = test_f2.reshape(10,1)
- # Previsões
- y_pred = w1 * test_f1 + w2 * test_f2
- predictions = []
- for val in y_pred:
- if(val > 1):
- predictions.append(1)
- else:
- predictions.append(-1)
- print("\nAcurácia nos Dados de Teste (Algoritmo Personalizado)", accuracy_score(y_test, predictions))
- # Testando agora com o algoritmo pronto do Scikit-Learn
- clf = SVC(kernel='linear')
- clf.fit(x_train,y_train)
- y_pred = clf.predict(x_test)
- print("\nAcurácia nos Dados de Teste (Algoritmo Pronto do Scikit-Learn)",accuracy_score(y_test, y_pred))
- print("\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement