Advertisement
Guest User

Untitled

a guest
Nov 17th, 2019
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.09 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.feature_selection import SelectKBest, f_classif
  4. from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
  5. from sklearn.model_selection import RepeatedStratifiedKFold
  6. from sklearn.neural_network import MLPClassifier
  7.  
  8.  
  9. def getInputAttributes():
  10. attributes = []
  11. with open("data/allhypo.names", "r") as data:
  12. for line in data.readlines():
  13. attributes.append(line)
  14. # cechy sa w linijkach od 11 do 40
  15. attributes = attributes[11:40]
  16. for index, param in enumerate(attributes):
  17. # param to jedna cecha, index to jej index w pliku
  18. attributes[index] = param.split(":")[0]
  19. return attributes
  20.  
  21.  
  22. def getInputData():
  23. thyroidData = pd.read_csv("data/allhypo.data")
  24. columns = getInputAttributes()
  25. # brakujaca nazwa kolumny z klasami
  26. columns.append('class')
  27. thyroidData.columns = columns
  28. for columnName in columns:
  29. thyroidData[columnName] = thyroidData[columnName].replace(['?'], '0')
  30. thyroidData[columnName] = thyroidData[columnName].replace(['F'], '0')
  31. thyroidData[columnName] = thyroidData[columnName].replace(['M'], '1')
  32. thyroidData[columnName] = thyroidData[columnName].replace(['t'], '1')
  33. thyroidData[columnName] = thyroidData[columnName].replace(['f'], '0')
  34. thyroidData[columnName] = thyroidData[columnName].replace(['WEST'], '0')
  35. thyroidData[columnName] = thyroidData[columnName].replace(['STMW'], '1')
  36. thyroidData[columnName] = thyroidData[columnName].replace(['SVHC'], '2')
  37. thyroidData[columnName] = thyroidData[columnName].replace(['SVI'], '3')
  38. thyroidData[columnName] = thyroidData[columnName].replace(['SVHD'], '4')
  39. thyroidData[columnName] = thyroidData[columnName].replace(['other'], '5')
  40. thyroidData.loc[thyroidData['class'].str.contains('negative', case=False), 'class'] = '0'
  41. thyroidData.loc[thyroidData['class'].str.contains('primary hypothyroid', case=False), 'class'] = '2'
  42. thyroidData.loc[thyroidData['class'].str.contains('compensated hypothyroid', case=False), 'class'] = '3'
  43. thyroidData.loc[thyroidData['class'].str.contains('secondary hypothyroid', case=False), 'class'] = '4'
  44. thyroidData.loc[thyroidData['class'].str.contains('hypothyroid', case=False), 'class'] = '1'
  45. return thyroidData
  46.  
  47.  
  48. def getParsedData(data, ranking, numberOfAttributes):
  49. attributesToConsider = []
  50. parsedData = data
  51. for attribute in range(numberOfAttributes):
  52. attributesToConsider.append(ranking[attribute].get("attribute"))
  53. for column in data:
  54. if (not column in attributesToConsider):
  55. parsedData = parsedData.drop(column, axis=1)
  56. return parsedData
  57.  
  58.  
  59. def runNeutralNetworkClassifer(X_train, X_test, y_train, y_test, perceptronCount):
  60. mlpc = MLPClassifier(hidden_layer_sizes=perceptronCount, max_iter=500)
  61. mlpc.fit(X_train, y_train)
  62. pred_mlpc = mlpc.predict(X_test)
  63. print(classification_report(y_test, pred_mlpc))
  64. print(confusion_matrix(y_test, pred_mlpc))
  65. print(accuracy_score(y_test, pred_mlpc))
  66.  
  67.  
  68. def runNestedCrossValidation(X, y, numberOfTrials):
  69. rskf = RepeatedStratifiedKFold(2, numberOfTrials, random_state=123)
  70. mlpScores = []
  71. confusionMatrix = []
  72. bestScore = 0
  73. it = 1
  74. for train_index, test_index in rskf.split(X, y):
  75. it = it + 1
  76. X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  77. y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  78. mlpc = MLPClassifier(hidden_layer_sizes=11, max_iter=2000)
  79. mlpc.fit(X_train, y_train)
  80. pred_mlpc = mlpc.predict(X_test)
  81. score = accuracy_score(y_test, pred_mlpc)
  82. mlpScores.append(score)
  83. if score > bestScore:
  84. bestScore = score
  85. confusionMatrix = confusion_matrix(y_test, pred_mlpc)
  86.  
  87. return np.mean(mlpScores), confusionMatrix
  88.  
  89.  
  90. def runBackpropagation(X, y, perceptronCount, momentum_, numberOfTrials):
  91. rskf = RepeatedStratifiedKFold(2, numberOfTrials, random_state=123)
  92. bpScores = []
  93. confusionMatrix = []
  94. bestScore = 0
  95. for train_index, test_index in rskf.split(X, y):
  96. X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  97. y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  98. nn = MLPClassifier(activation='logistic', solver='sgd', hidden_layer_sizes=perceptronCount, max_iter=2000,
  99. random_state=1,
  100. momentum=momentum_)
  101. nn.fit(X_train, y_train)
  102. pred = nn.predict(X_test)
  103. score = accuracy_score(y_test, pred)
  104. bpScores.append(score)
  105. if score > bestScore:
  106. bestScore = score
  107. confusionMatrix = confusion_matrix(y_test, pred)
  108.  
  109. return np.mean(bpScores), confusionMatrix
  110.  
  111.  
  112. def runAllExperiments(X_, y, features):
  113. numberOfNeuronsArray = [11, 17, 23]
  114. numberOfTrials = 5
  115. bestResult = 0
  116. confusion_matrix = []
  117. f = open("experimentResults.txt", "w+")
  118. f.write("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----\n")
  119. print("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----")
  120. for it in range(1, 8):
  121. X = getParsedData(X_, features, it)
  122. f.write(str(it) + " Wyniki eksperymentow dla " + str(it) + " cech(y)\n")
  123. print(str(it) + " Wyniki eksperymentow dla " + str(it) + " cech(y)")
  124. for j in range(0, 3):
  125. numberOfNeurons = numberOfNeuronsArray[j]
  126. f.write(str(it) + "." + str(j + 1) + " Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(
  127. numberOfNeurons) + "\n")
  128. f.write("Siec jednokierunkowa: \n" )
  129. print(str(it) + "." + str(j + 1) + " Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(
  130. numberOfNeurons))
  131. print("Siec jednokierunkowa: ")
  132. result, confMatrix = runNestedCrossValidation(X, y, numberOfTrials)
  133. f.write(str(result) + "\n")
  134. print(str(result))
  135. if (result > bestResult):
  136. bestResult = result
  137. confusionMatrix = confMatrix
  138.  
  139. for k in range(0, 2):
  140. f.write("Propagacja wsteczna z momentum rownym " + str(k) + ": \n")
  141. print("Propagacja wsteczna z momentum rownym " + str(k) + ": ")
  142. result, confMatrix = runBackpropagation(X, y, numberOfNeurons, k, numberOfTrials)
  143. f.write(str(result) + "\n")
  144. print(result)
  145. if (result > bestResult):
  146. bestResult = result
  147. confusionMatrix = confMatrix
  148. f.write("Najlepszy uzyskany wynik: \n")
  149. f.write(str(bestResult) + "\n")
  150. print("Najlepszy uzyskany wynik: ")
  151. print(bestResult)
  152. f.write("Najlepsza macierz konfuzji: \n")
  153. f.write(confusionMatrix)
  154. print("Najlepsza macierz konfuzji: ")
  155. print(confusionMatrix)
  156. f.close()
  157.  
  158. def runExperiments(X_, y, features, numberOfFeatures):
  159. numberOfNeuronsArray = [11, 17, 23]
  160. numberOfTrials = 5
  161. bestResult = 0
  162. confusionMatrix = []
  163. f = open("experimentResults.txt", "w+")
  164. f.write("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----\n")
  165. print("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----")
  166.  
  167. X = getParsedData(X_, features, numberOfFeatures)
  168. f.write("Wyniki eksperymentow dla " + str(numberOfFeatures) + " cech\n")
  169. print("Wyniki eksperymentow dla " + str(numberOfFeatures) + " cech")
  170. for j in range(0, 3):
  171. numberOfNeurons = numberOfNeuronsArray[j]
  172. f.write("Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(numberOfNeurons) + "\n")
  173. f.write("Siec jednokierunkowa: \n")
  174. print("Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(numberOfNeurons))
  175. print("Siec jednokierunkowa: ")
  176. result, confMatrix = runNestedCrossValidation(X, y, numberOfTrials)
  177. f.write(str(result) + "\n")
  178. print(str(result))
  179. if (result > bestResult):
  180. bestResult = result
  181. confusionMatrix = confMatrix
  182.  
  183. for k in range(0, 2):
  184. f.write("Propagacja wsteczna z momentum rownym " + str(k) + ": \n")
  185. print("Propagacja wsteczna z momentum rownym " + str(k) + ": ")
  186. result, confMatrix = runBackpropagation(X, y, numberOfNeurons, k, numberOfTrials)
  187. f.write(str(result) + "\n")
  188. print(result)
  189. if (result > bestResult):
  190. bestResult = result
  191. confusionMatrix = confMatrix
  192.  
  193. f.write("Najlepszy uzyskany wynik: \n")
  194. f.write(str(bestResult) + "\n")
  195. print("Najlepszy uzyskany wynik: ")
  196. print(bestResult)
  197. f.write("Najlepsza macierz konfuzji: \n")
  198. f.write(str(confusionMatrix))
  199. print("Najlepsza macierz konfuzji: ")
  200. print(str(confusionMatrix))
  201. f.close()
  202.  
  203.  
  204. def main():
  205. # funkcja zwraca macierz ilosci probek i wartosci cech
  206. objects = getInputData()
  207. columns = objects.columns
  208. # znajduje stałą w 26 i 27. do usuniecia
  209. to_delete = [columns[26], columns[27]]
  210. objects = objects.drop(to_delete[0], axis=1)
  211. objects = objects.drop(to_delete[1], axis=1)
  212. # x co techy i wartosci
  213. X = objects.drop('class', axis=1)
  214. # y to tylko wartosci class
  215. y = objects['class']
  216. # ilosc cech
  217. new_features = X.shape[1]
  218. k_best_selector = SelectKBest(score_func=f_classif, k=new_features)
  219. k_best_selector.fit(X, y)
  220. scores = k_best_selector.scores_
  221. column_scores = [{'attribute': name, 'result': round(score, 2)} for name, score in zip(X.columns, scores)]
  222. features = sorted(column_scores, key=lambda x: x['result'], reverse=True)
  223. print("Ranking cech:")
  224. for row in features:
  225. print(row)
  226. numberOfAttributes = 7 # przy 6 przestaje rosnąć
  227. numberOfNeurons = 11
  228. numberOfTrials = 5
  229.  
  230. runExperiments(X, y, features, 6)
  231.  
  232.  
  233. if __name__ == "__main__":
  234. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement