Advertisement
11eimilia11

ML P01 atual

Jan 3rd, 2019
141
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.41 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.model_selection import KFold
  3. from sklearn.neighbors import KNeighborsClassifier
  4. from sklearn.tree import DecisionTreeClassifier
  5. from sklearn.naive_bayes import GaussianNB
  6. from sklearn.linear_model import LogisticRegression
  7. from sklearn.svm import SVC
  8. from Lista02 import FuncoesML as fun
  9. from scipy import stats
  10. import numpy as np
  11. import time
  12.  
  13.  
  14. #Criando a classe receita que irá conter nome, classe e vetor de ingredientes
  15. class Receita:
  16.     Name = None
  17.     Class = 0
  18.     ingredientes = []
  19.     ingredientesnorm = []
  20.  
  21.  
  22. #Método que retorna o vetor de ingredientes
  23.     def getingrednorm(self):
  24.         return self.ingredientesnorm
  25.  
  26. #Construtor da classe receita
  27.     def __init__(self, name, Class, ingredientes):
  28.         self.Name = name
  29.         self.Class = Class
  30.         self.ingredientes = ingredientes
  31.  
  32. #Método que adiciona ingredientes no vetor de ingredientes
  33.     def adicionaringrediente(self, ingrediente):
  34.         self.ingredientes.append(ingrediente)
  35.  
  36. #abrindo o arquivo com a base de dados
  37. reshipe = open("C:/Users/Auricelia/Desktop/DatasetsML/ReshibaseQ.txt", "rt", encoding="utf8")
  38.  
  39. #criando o vetor de receitas
  40. receitas = []
  41.  
  42. # preenchendo o vetor de receitas
  43. for receita in reshipe:
  44.     dividido = receita.split(sep=',')
  45.     dividido[(len(dividido) - 1)] = dividido[(len(dividido) - 1)].replace('\n', '')
  46.     ingredientes = []
  47.  
  48.     for x in range(2, len(dividido)):
  49.         ingredientes.append(dividido[x])
  50.  
  51.     receitas.append(Receita(dividido[1], dividido[0], ingredientes))
  52.  
  53. #vetor que irá receber todos os ingredientes sem repetição para fazer os vetores binários
  54. todosingredientes = []
  55.  
  56. #preenchendo o vetor 'todosingredientes' sem repetição
  57. for rec in receitas:
  58.     for ingrediente in rec.ingredientes:
  59.  
  60.         if todosingredientes.__contains__(ingrediente) == False:
  61.             todosingredientes.append(ingrediente)
  62. #ordenando o vetor
  63. todosingredientes = sorted(todosingredientes)
  64.  
  65. # preenchendo nos objetos receita o vetor binário com 0
  66.  
  67. for rec in receitas:
  68.     norm = []
  69.     for y in range(0, len(todosingredientes)):
  70.         norm.append(0)
  71.     rec.ingredientesnorm = norm
  72.  
  73. # Colocando 1 na posição em que existe o ingrediente
  74.  
  75. for rec in receitas:
  76.     for y in rec.ingredientes:
  77.         pos = todosingredientes.index(y)
  78.         rec.ingredientesnorm[pos] = 1
  79.  
  80. # Vetor que irá receber os vetores binários de ingreientes de cada receita
  81. arrayingredientesnorm = []
  82.  
  83. # Preenchendo o vetor com os ingredientes normalizados
  84.  
  85. for rec in receitas:
  86.     arrayingredientesnorm.append(rec.ingredientesnorm)
  87.  
  88. # Vetor que irá receber as classes de cada receita
  89. arrayclasse = []
  90.  
  91. # preenchendo o vetor com as classes de cada receita
  92. for rec in receitas:
  93.     arrayclasse.append(rec.Class)
  94.  
  95. # criando o dataframe que irá armazenar os ingredientes
  96. df = pd.DataFrame(arrayingredientesnorm)
  97.  
  98. #adicionando a classe ao dataframe
  99. df['Class'] = arrayclasse
  100. '''
  101. #print(df)
  102.  
  103. print('Entrada',fun.quantidade_por_classe(reshipe, 'Class', 1))
  104. print('Prato principal',fun.quantidade_por_classe(reshipe, 'Class', 2))
  105. print('Acompanhamento',fun.quantidade_por_classe(reshipe, 'Class', 3))
  106. print('Sobremesa',fun.quantidade_por_classe(reshipe, 'Class', 4))
  107.  
  108. '''
  109. df.to_csv('C:/Users/Auricelia/Desktop/DataSetsML/df_norm.csv')
  110.  
  111. #instanciando o kfold com k = 10
  112. kfold = KFold(10, True, 1)
  113.  
  114. #instanciando os aloritmos usados
  115.  
  116. #KNN K = 3, K = 5, K = 7
  117.  
  118. K_3 = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
  119. acertoK_3 = [] # vetor que irá conter as acuráricas do algoritmo em cada um dos testes
  120. k_3time = [] # vetor que irá conter os tempos de duração de cada algoritmo em cada um dos testes
  121.  
  122. K_5 = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
  123. acertoK_5 = []
  124. k_5time = []
  125.  
  126. K_7 = KNeighborsClassifier(n_neighbors=7, metric='euclidean')
  127. acertoK_7 = []
  128. k_7time = []
  129.  
  130. # KNN Ponderado K = 3, K = 5, K = 7
  131.  
  132. KP_3 = KNeighborsClassifier(n_neighbors=3, weights='distance',metric='euclidean')
  133. acertoKP_3 = []
  134. kp3time = []
  135.  
  136. KP_5 = KNeighborsClassifier(n_neighbors=5, weights='distance', metric='euclidean')
  137. acertoKP_5 = []
  138. kp5time = []
  139.  
  140. KP_7 = KNeighborsClassifier(n_neighbors=7, weights='distance', metric='euclidean')
  141. acertoKP_7 = []
  142. kp7time = []
  143.  
  144. #Naive Bayes
  145.  
  146. naiveBayes = GaussianNB()
  147. acertonaiveBayes = []
  148. naiveBayestime = []
  149.  
  150. #Árvore de decisão
  151.  
  152. ArvoreDecisao = DecisionTreeClassifier()
  153. acertoArvoreDecisao= []
  154. arvoreDecisaotime = []
  155.  
  156. #SVM linear
  157.  
  158. SVMlinear = SVC(kernel='linear')
  159. acertoSVMLinear = []
  160. svmlineartime = []
  161.  
  162.  
  163. #SVM RBF
  164.  
  165. SVMrbf = SVC(kernel='rbf', gamma='scale')
  166. acertoSVMrbf= []
  167. svmrbftime = []
  168.  
  169. #Regressão Logística
  170.  
  171. logisticRegr = LogisticRegression()
  172. logisticRarray = []
  173. logistictime = []
  174.  
  175. # variável que irá servir para calcular o tempo total de execução dos algoritmos
  176. tempoinicial = time.time()
  177.  
  178.  
  179. for x in range(0, 5):
  180.  
  181.     tempo1 = time.time()
  182.     cols = list(df.columns)
  183.     cols.remove('Class')
  184.  
  185.     # separando os dataframes um com classe outro sem classe
  186.     df_noclass = df[cols]
  187.     df_class = df['Class']
  188.  
  189.     # início do kfold
  190.     c = kfold.split(df)
  191.  
  192.     for train_index, test_index in c:
  193.  
  194.         noclass_train, noclass_test = df_noclass.iloc[train_index], df_noclass.iloc[test_index]
  195.         class_train, class_test = df_class.iloc[train_index], df_class.iloc[test_index]
  196.  
  197.         K_3start = time.time()
  198.         K_3.fit(noclass_train, class_train)
  199.         acertoK_3.append(K_3.score(noclass_test, class_test))
  200.         K_3end = time.time()
  201.         k_3time.append(K_3end - K_3start)
  202.  
  203.         K_5start = time.time()
  204.         K_5.fit(noclass_train, class_train)
  205.         acertoK_5.append(K_5.score(noclass_test, class_test))
  206.         K_5end = time.time()
  207.         k_5time.append(K_5end - K_5start)
  208.  
  209.         K_7start = time.time()
  210.         K_7.fit(noclass_train, class_train)
  211.         acertoK_7.append(K_7.score(noclass_test, class_test))
  212.         K_7end = time.time()
  213.         k_7time.append(K_7end - K_7start)
  214.  
  215.         naivestart = time.time()
  216.         naiveBayes.fit(noclass_train, class_train)
  217.         acertonaiveBayes.append(naiveBayes.score(noclass_test, class_test))
  218.         naiveend = time.time()
  219.         naiveBayestime.append(naiveend - naivestart)
  220.  
  221.         arvorestart = time.time()
  222.         ArvoreDecisao.fit(noclass_train, class_train)
  223.         acertoArvoreDecisao.append(ArvoreDecisao.score(noclass_test, class_test))
  224.         treeend = time.time()
  225.         arvoreDecisaotime.append(treeend - arvorestart)
  226.  
  227.         kp3start = time.time()
  228.         KP_3.fit(noclass_train, class_train)
  229.         acertoKP_3.append(KP_3.score(noclass_test, class_test))
  230.         kp3end = time.time()
  231.         kp3time.append(kp3end - kp3start)
  232.  
  233.         kp7start = time.time()
  234.         KP_7.fit(noclass_train, class_train)
  235.         acertoKP_7.append(KP_7.score(noclass_test, class_test))
  236.         kp7end = time.time()
  237.         kp7time.append(kp7end - kp7start)
  238.  
  239.         kp5start = time.time()
  240.         KP_5.fit(noclass_train, class_train)
  241.         acertoKP_5.append(KP_5.score(noclass_test, class_test))
  242.         kp5end = time.time()
  243.         kp5time.append(kp5end - kp5start)
  244.  
  245.         svmlinearstart = time.time()
  246.         SVMlinear.fit(noclass_train, class_train)
  247.         acertoSVMLinear.append(SVMlinear.score(noclass_test, class_test))
  248.         svmlinearend = time.time()
  249.         svmlineartime.append(svmlinearend - svmlinearstart)
  250.  
  251.         svmrbfstart = time.time()
  252.         SVMrbf.fit(noclass_train, class_train)
  253.         acertoSVMrbf.append(SVMrbf.score(noclass_test, class_test))
  254.         svmrbfend = time.time()
  255.         svmrbftime.append(svmrbfend - svmrbfstart)
  256.  
  257.         logisticstart = time.time()
  258.         logisticRegr.fit(noclass_train, class_train)
  259.         logisticRarray.append(logisticRegr.score(noclass_test, class_test))
  260.         logisticend = time.time()
  261.         logistictime.append(logisticend - logisticstart)
  262.  
  263.  
  264.     df = df.sample(frac=1)
  265.     print("Terminou a ", x)
  266.     tempo2 = time.time()
  267.     print("Tempo da rodada ", x, (tempo2 - tempo1) / 60)
  268.  
  269. tempofinal = time.time()
  270.  
  271. mediaknn3 = np.mean(acertoK_3)
  272. medianaknn3 = np.median(acertoK_3)
  273. stdknn3 = np.std(acertoK_3)
  274. timeknn3 = np.mean(acertoK_3)
  275.  
  276. mediaknn5 = np.mean(acertoK_5)
  277. medianaknn5 = np.median(acertoK_5)
  278. stdknn5 = np.std(acertoK_5)
  279. timeknn5 = np.mean(acertoK_5)
  280.  
  281. mediaknn7 = np.mean(acertoK_7)
  282. medianaknn7 = np.median(acertoK_7)
  283. stdknn7 = np.std(acertoK_7)
  284. timeknn7 = np.mean(acertoK_7)
  285.  
  286.  
  287. print('________________________________________________\n')
  288. print("KNN")
  289. print("Media:\nK = 3: ", mediaknn3, " K = 5: ", mediaknn5, " K = 7: ", mediaknn7)
  290. print("Mediana:\nK = 3: ", medianaknn3, " K = 5: ", medianaknn5, " K = 7: ", medianaknn7)
  291. print("Desvio Padrão:\nK = 3: ", stdknn3, " K = 5: ", stdknn5, " K = 7: ", stdknn7)
  292. print("Tempo médio:\nK = 3: ", timeknn3, " K = 5: ", timeknn5, " K = 7: ", timeknn7)
  293. print("_______________________________________________")
  294.  
  295. mediaknnpounded3 = np.mean(acertoKP_3)
  296. medianaknnpounded3 = np.median(acertoKP_3)
  297. stdknnpounded3 = np.std(acertoKP_3)
  298. timewknn3 = np.mean(acertoKP_3)
  299.  
  300. mediaknnpounded5 = np.mean(acertoKP_5)
  301. medianaknnpounded5 = np.median(acertoKP_5)
  302. stdknnpounded5 = np.std(acertoKP_5)
  303. timewknn5 = np.mean(acertoKP_5)
  304.  
  305. mediaknnpounded7 = np.mean(acertoKP_7)
  306. medianaknnpounded7 = np.median(acertoKP_7)
  307. stdknnpounded7 = np.std(acertoKP_7)
  308. timewknn7 = np.mean(acertoKP_7)
  309.  
  310. print("_______________________________________________")
  311. print("KNN Ponderado ")
  312. print("Media:\nk = 1: ", mediaknnpounded3, " k = 3: ", mediaknnpounded5, " k = 5: ", mediaknnpounded7)
  313. print("Mediana:\nk = 1: ", medianaknnpounded3, " k = 3: ", medianaknnpounded5, " k = 5: ", medianaknnpounded7)
  314. print("Desvio padrão:\nk = 1: ", stdknnpounded3, " k = 3: ", stdknnpounded5, " k = 5: ", stdknnpounded7)
  315. print("Tempo médio:\nk = 1: ", timewknn3, " k = 3: ", timewknn5, " k = 5: ", timewknn7)
  316. print("_______________________________________________")
  317.  
  318. medianaive = np.mean(acertonaiveBayes)
  319. mediananaive = np.median(acertonaiveBayes)
  320. stdnaive = np.std(acertonaiveBayes)
  321. timenaive = np.mean(acertonaiveBayes)
  322.  
  323. print("_______________________________________________")
  324. print("Naïve Bayes")
  325. print("Media: ", medianaive)
  326. print("Mediana: ", mediananaive)
  327. print("Desvio padrão: ", stdnaive)
  328. print("Tempo médio: ", timenaive)
  329. print("_______________________________________________")
  330.  
  331. mediatree = np.mean(acertoArvoreDecisao)
  332. medianatree = np.median(acertoArvoreDecisao)
  333. stdtree = np.std(acertoArvoreDecisao)
  334. timetree = np.mean(acertoArvoreDecisao)
  335.  
  336. print("_______________________________________________")
  337. print("Árvore de decisão")
  338. print("Media: ", mediatree)
  339. print("Mediana: ", medianatree)
  340. print("Desvio padrão: ", stdtree)
  341. print("Tempo médio: ", timetree)
  342. print("_______________________________________________")
  343.  
  344. mediasvmlinear = np.mean(acertoSVMLinear)
  345. medianasvmlinear = np.median(acertoSVMLinear)
  346. stdsvmlinear = np.std(acertoSVMLinear)
  347. timesvmlinear = np.mean(acertoSVMLinear)
  348.  
  349. print("_______________________________________________")
  350. print("SVM kernel linear")
  351. print("Media: ", mediasvmlinear)
  352. print("Mediana: ", medianasvmlinear)
  353. print("Desvio padrão: ", stdsvmlinear)
  354. print("Tempo médio: ", timesvmlinear)
  355. print("_______________________________________________")
  356.  
  357. mediasvmrbf = np.mean(acertoSVMrbf)
  358. medianasvmrbf = np.median(acertoSVMrbf)
  359. stdsvmrbf = np.std(acertoSVMrbf)
  360. timesvmrbf = np.mean(acertoSVMrbf)
  361.  
  362. print("_______________________________________________")
  363. print("SVM kernel rbf")
  364. print("Media: ", mediasvmrbf)
  365. print("Mediana: ", medianasvmrbf)
  366. print("Desvio padrão: ", stdsvmrbf)
  367. print("Tempo médio: ", timesvmrbf)
  368. print("_______________________________________________")
  369.  
  370. medialogistic = np.mean(logisticRarray)
  371. medianalogistic = np.median(logisticRarray)
  372. stdslogistic = np.std(logisticRarray)
  373. timelogistic = np.mean(logistictime)
  374.  
  375. print("_______________________________________________")
  376. print("Regressao Logistica")
  377. print("Media: ", medialogistic)
  378. print("Mediana: ", medianalogistic)
  379. print("Desvio padrão: ", stdslogistic)
  380. print("Tempo médio: ", timelogistic)
  381. print("_______________________________________________")
  382.  
  383.  
  384. print("Tempo total: ", (tempofinal - tempoinicial) / 60)
  385.  
  386.  
  387. # implementando o teste de Friedman para todos os algoritmos usados
  388. friedman = stats.friedmanchisquare(acertoK_3,acertoK_5,acertoK_7,acertoKP_3,acertoKP_5,acertoKP_7,acertonaiveBayes,acertoArvoreDecisao,acertoSVMrbf,acertoSVMLinear,logisticRarray)
  389.  
  390. print(friedman)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement