SHARE
TWEET

Untitled

a guest Apr 18th, 2019 80 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Estudo de Caso 1 - SVMs
  2.  
  3. ####### Importando e Preparando o Dataset #######
  4.  
  5. # Import pandas
  6. import pandas as pd
  7.  
  8. # Carrega o dataset
  9. df = pd.read_csv('iris.csv')
  10.  
  11. # Remove a coluna ID
  12. df = df.drop(['Id'],axis=1)
  13.  
  14. # Define a variável alvo
  15. target = df['Species']
  16.  
  17. # Cria um conjunto
  18. s = set()
  19.  
  20. # Valores possíveis da variável target
  21. for val in target:
  22.     s.add(val)
  23.  
  24. # Converte para lista
  25. s = list(s)
  26.  
  27. # Separa em dados de treino e de teste
  28. rows = list(range(100,150))
  29. df = df.drop(df.index[rows])
  30.  
  31.  
  32. ####### Plot dos Dados #######
  33.  
  34. # Import matplotlib
  35. import matplotlib.pyplot as plt
  36.  
  37. # Define x e y (entrada e saída)
  38. x = df['SepalLengthCm']
  39. y = df['PetalLengthCm']
  40.  
  41. # Dados referentes a uma classe de flores
  42. setosa_x = x[:50]
  43. setosa_y = y[:50]
  44.  
  45. # Dados referentes a outra classe de flores
  46. versicolor_x = x[50:]
  47. versicolor_y = y[50:]
  48.  
  49. # Plot
  50. plt.figure(figsize=(8,6))
  51. plt.scatter(setosa_x,setosa_y,marker='+',color='green')
  52. plt.scatter(versicolor_x,versicolor_y,marker='_',color='red')
  53. plt.show()
  54.  
  55.  
  56. ####### Algoritmo SVM #######
  57.  
  58. from sklearn.utils import shuffle
  59. from sklearn.cross_validation import train_test_split
  60. import numpy as np
  61.  
  62. # Descarta o restante dos recursos e extraia os valores de destino
  63. df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1)
  64. Y = []
  65. target = df['Species']
  66.  
  67. for val in target:
  68.     if(val == 'Iris-setosa'):
  69.         Y.append(-1)
  70.     else:
  71.         Y.append(1)
  72. df = df.drop(['Species'],axis=1)
  73. X = df.values.tolist()
  74.  
  75. # Embaralhe e divida os dados de treinamento e testes
  76. X, Y = shuffle(X,Y)
  77. x_train = []
  78. y_train = []
  79. x_test = []
  80. y_test = []
  81.  
  82. x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.9)
  83.  
  84. # Converte tudo para array NumPy
  85. x_train = np.array(x_train)
  86. y_train = np.array(y_train)
  87. x_test = np.array(x_test)
  88. y_test = np.array(y_test)
  89.  
  90. # Ajusta o shape
  91. y_train = y_train.reshape(90,1)
  92. y_test = y_test.reshape(10,1)
  93.  
  94. # Dados de treino das flores 1 e 2
  95. train_f1 = x_train[:,0]
  96. train_f2 = x_train[:,1]
  97.  
  98. # Ajusta o shape dos dados das flores 1 e 2
  99. train_f1 = train_f1.reshape(90,1)
  100. train_f2 = train_f2.reshape(90,1)
  101.  
  102. # Cria o vetor de pesos
  103. w1 = np.zeros((90,1))
  104. w2 = np.zeros((90,1))
  105.  
  106. # Hiperparâmetros
  107. epochs = 1
  108. alpha = 0.0001
  109.  
  110. # Treinamento
  111. while(epochs < 10000):
  112.    
  113.     y = w1 * train_f1 + w2 * train_f2
  114.    
  115.     prod = y * y_train
  116.    
  117.     print(epochs)
  118.    
  119.     count = 0
  120.    
  121.     for val in prod:
  122.         if(val >= 1):
  123.             cost = 0
  124.             w1 = w1 - alpha * (2 * 1/epochs * w1)
  125.             w2 = w2 - alpha * (2 * 1/epochs * w2)
  126.            
  127.         else:
  128.             cost = 1 - val
  129.             w1 = w1 + alpha * (train_f1[count] * y_train[count] - 2 * 1/epochs * w1)
  130.             w2 = w2 + alpha * (train_f2[count] * y_train[count] - 2 * 1/epochs * w2)
  131.         count += 1
  132.     epochs += 1
  133.  
  134.  
  135. ####### Avalia o Modelo #######
  136.  
  137. from sklearn.svm import SVC
  138. from sklearn.metrics import accuracy_score
  139. import warnings
  140. warnings.filterwarnings("ignore")
  141.  
  142.  
  143. # Agora, cortamos os pesos, pois os dados de teste contêm apenas 10 pontos de dados
  144. index = list(range(10,90))
  145. w1 = np.delete(w1,index)
  146. w2 = np.delete(w2,index)
  147.  
  148. # Ajusta o shape
  149. w1 = w1.reshape(10,1)
  150. w2 = w2.reshape(10,1)
  151.  
  152. # Extrair os recursos de dados de teste
  153. test_f1 = x_test[:,0]
  154. test_f2 = x_test[:,1]
  155.  
  156. test_f1 = test_f1.reshape(10,1)
  157. test_f2 = test_f2.reshape(10,1)
  158.  
  159. # Previsões
  160. y_pred = w1 * test_f1 + w2 * test_f2
  161. predictions = []
  162. for val in y_pred:
  163.     if(val > 1):
  164.         predictions.append(1)
  165.     else:
  166.         predictions.append(-1)
  167.  
  168. print("\nAcurácia nos Dados de Teste (Algoritmo Personalizado)", accuracy_score(y_test, predictions))
  169.  
  170.  
  171. # Testando agora com o algoritmo pronto do Scikit-Learn
  172. clf = SVC(kernel='linear')
  173. clf.fit(x_train,y_train)
  174. y_pred = clf.predict(x_test)
  175. print("\nAcurácia nos Dados de Teste (Algoritmo Pronto do Scikit-Learn)",accuracy_score(y_test, y_pred))
  176. print("\n")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top