Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.feature_selection import SelectKBest, f_classif
- from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
- from sklearn.model_selection import RepeatedStratifiedKFold
- from sklearn.neural_network import MLPClassifier
- def getInputAttributes():
- attributes = []
- with open("data/allhypo.names", "r") as data:
- for line in data.readlines():
- attributes.append(line)
- # cechy sa w linijkach od 11 do 40
- attributes = attributes[11:40]
- for index, param in enumerate(attributes):
- # param to jedna cecha, index to jej index w pliku
- attributes[index] = param.split(":")[0]
- return attributes
- def getInputData():
- thyroidData = pd.read_csv("data/allhypo.data")
- columns = getInputAttributes()
- # brakujaca nazwa kolumny z klasami
- columns.append('class')
- thyroidData.columns = columns
- for columnName in columns:
- thyroidData[columnName] = thyroidData[columnName].replace(['?'], '0')
- thyroidData[columnName] = thyroidData[columnName].replace(['F'], '0')
- thyroidData[columnName] = thyroidData[columnName].replace(['M'], '1')
- thyroidData[columnName] = thyroidData[columnName].replace(['t'], '1')
- thyroidData[columnName] = thyroidData[columnName].replace(['f'], '0')
- thyroidData[columnName] = thyroidData[columnName].replace(['WEST'], '0')
- thyroidData[columnName] = thyroidData[columnName].replace(['STMW'], '1')
- thyroidData[columnName] = thyroidData[columnName].replace(['SVHC'], '2')
- thyroidData[columnName] = thyroidData[columnName].replace(['SVI'], '3')
- thyroidData[columnName] = thyroidData[columnName].replace(['SVHD'], '4')
- thyroidData[columnName] = thyroidData[columnName].replace(['other'], '5')
- thyroidData.loc[thyroidData['class'].str.contains('negative', case=False), 'class'] = '0'
- thyroidData.loc[thyroidData['class'].str.contains('primary hypothyroid', case=False), 'class'] = '2'
- thyroidData.loc[thyroidData['class'].str.contains('compensated hypothyroid', case=False), 'class'] = '3'
- thyroidData.loc[thyroidData['class'].str.contains('secondary hypothyroid', case=False), 'class'] = '4'
- thyroidData.loc[thyroidData['class'].str.contains('hypothyroid', case=False), 'class'] = '1'
- return thyroidData
- def getParsedData(data, ranking, numberOfAttributes):
- attributesToConsider = []
- parsedData = data
- for attribute in range(numberOfAttributes):
- attributesToConsider.append(ranking[attribute].get("attribute"))
- for column in data:
- if (not column in attributesToConsider):
- parsedData = parsedData.drop(column, axis=1)
- return parsedData
- def runNeutralNetworkClassifer(X_train, X_test, y_train, y_test, perceptronCount):
- mlpc = MLPClassifier(hidden_layer_sizes=perceptronCount, max_iter=500)
- mlpc.fit(X_train, y_train)
- pred_mlpc = mlpc.predict(X_test)
- print(classification_report(y_test, pred_mlpc))
- print(confusion_matrix(y_test, pred_mlpc))
- print(accuracy_score(y_test, pred_mlpc))
- def runNestedCrossValidation(X, y, numberOfTrials):
- rskf = RepeatedStratifiedKFold(2, numberOfTrials, random_state=123)
- mlpScores = []
- confusionMatrix = []
- bestScore = 0
- it = 1
- for train_index, test_index in rskf.split(X, y):
- it = it + 1
- X_train, X_test = X.iloc[train_index], X.iloc[test_index]
- y_train, y_test = y.iloc[train_index], y.iloc[test_index]
- mlpc = MLPClassifier(hidden_layer_sizes=11, max_iter=2000)
- mlpc.fit(X_train, y_train)
- pred_mlpc = mlpc.predict(X_test)
- score = accuracy_score(y_test, pred_mlpc)
- mlpScores.append(score)
- if score > bestScore:
- bestScore = score
- confusionMatrix = confusion_matrix(y_test, pred_mlpc)
- return np.mean(mlpScores), confusionMatrix
- def runBackpropagation(X, y, perceptronCount, momentum_, numberOfTrials):
- rskf = RepeatedStratifiedKFold(2, numberOfTrials, random_state=123)
- bpScores = []
- confusionMatrix = []
- bestScore = 0
- for train_index, test_index in rskf.split(X, y):
- X_train, X_test = X.iloc[train_index], X.iloc[test_index]
- y_train, y_test = y.iloc[train_index], y.iloc[test_index]
- nn = MLPClassifier(activation='logistic', solver='sgd', hidden_layer_sizes=perceptronCount, max_iter=2000,
- random_state=1,
- momentum=momentum_)
- nn.fit(X_train, y_train)
- pred = nn.predict(X_test)
- score = accuracy_score(y_test, pred)
- bpScores.append(score)
- if score > bestScore:
- bestScore = score
- confusionMatrix = confusion_matrix(y_test, pred)
- return np.mean(bpScores), confusionMatrix
- def runAllExperiments(X_, y, features):
- numberOfNeuronsArray = [11, 17, 23]
- numberOfTrials = 5
- bestResult = 0
- confusion_matrix = []
- f = open("experimentResults.txt", "w+")
- f.write("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----\n")
- print("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----")
- for it in range(1, 8):
- X = getParsedData(X_, features, it)
- f.write(str(it) + " Wyniki eksperymentow dla " + str(it) + " cech(y)\n")
- print(str(it) + " Wyniki eksperymentow dla " + str(it) + " cech(y)")
- for j in range(0, 3):
- numberOfNeurons = numberOfNeuronsArray[j]
- f.write(str(it) + "." + str(j + 1) + " Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(
- numberOfNeurons) + "\n")
- f.write("Siec jednokierunkowa: \n" )
- print(str(it) + "." + str(j + 1) + " Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(
- numberOfNeurons))
- print("Siec jednokierunkowa: ")
- result, confMatrix = runNestedCrossValidation(X, y, numberOfTrials)
- f.write(str(result) + "\n")
- print(str(result))
- if (result > bestResult):
- bestResult = result
- confusionMatrix = confMatrix
- for k in range(0, 2):
- f.write("Propagacja wsteczna z momentum rownym " + str(k) + ": \n")
- print("Propagacja wsteczna z momentum rownym " + str(k) + ": ")
- result, confMatrix = runBackpropagation(X, y, numberOfNeurons, k, numberOfTrials)
- f.write(str(result) + "\n")
- print(result)
- if (result > bestResult):
- bestResult = result
- confusionMatrix = confMatrix
- f.write("Najlepszy uzyskany wynik: \n")
- f.write(str(bestResult) + "\n")
- print("Najlepszy uzyskany wynik: ")
- print(bestResult)
- f.write("Najlepsza macierz konfuzji: \n")
- f.write(confusionMatrix)
- print("Najlepsza macierz konfuzji: ")
- print(confusionMatrix)
- f.close()
- def runExperiments(X_, y, features, numberOfFeatures):
- numberOfNeuronsArray = [11, 17, 23]
- numberOfTrials = 5
- bestResult = 0
- confusionMatrix = []
- f = open("experimentResults.txt", "w+")
- f.write("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----\n")
- print("-----WYNIKI EKSPERYMENTOW DLA SIECI NEURONOWEJ DLA PRZYPADKU NIEDOCZYNNOSCI TARCZYCY-----")
- X = getParsedData(X_, features, numberOfFeatures)
- f.write("Wyniki eksperymentow dla " + str(numberOfFeatures) + " cech\n")
- print("Wyniki eksperymentow dla " + str(numberOfFeatures) + " cech")
- for j in range(0, 3):
- numberOfNeurons = numberOfNeuronsArray[j]
- f.write("Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(numberOfNeurons) + "\n")
- f.write("Siec jednokierunkowa: \n")
- print("Wyniki dla liczby neuronow w warstwie ukrytej rownej " + str(numberOfNeurons))
- print("Siec jednokierunkowa: ")
- result, confMatrix = runNestedCrossValidation(X, y, numberOfTrials)
- f.write(str(result) + "\n")
- print(str(result))
- if (result > bestResult):
- bestResult = result
- confusionMatrix = confMatrix
- for k in range(0, 2):
- f.write("Propagacja wsteczna z momentum rownym " + str(k) + ": \n")
- print("Propagacja wsteczna z momentum rownym " + str(k) + ": ")
- result, confMatrix = runBackpropagation(X, y, numberOfNeurons, k, numberOfTrials)
- f.write(str(result) + "\n")
- print(result)
- if (result > bestResult):
- bestResult = result
- confusionMatrix = confMatrix
- f.write("Najlepszy uzyskany wynik: \n")
- f.write(str(bestResult) + "\n")
- print("Najlepszy uzyskany wynik: ")
- print(bestResult)
- f.write("Najlepsza macierz konfuzji: \n")
- f.write(str(confusionMatrix))
- print("Najlepsza macierz konfuzji: ")
- print(str(confusionMatrix))
- f.close()
- def main():
- # funkcja zwraca macierz ilosci probek i wartosci cech
- objects = getInputData()
- columns = objects.columns
- # znajduje stałą w 26 i 27. do usuniecia
- to_delete = [columns[26], columns[27]]
- objects = objects.drop(to_delete[0], axis=1)
- objects = objects.drop(to_delete[1], axis=1)
- # x co techy i wartosci
- X = objects.drop('class', axis=1)
- # y to tylko wartosci class
- y = objects['class']
- # ilosc cech
- new_features = X.shape[1]
- k_best_selector = SelectKBest(score_func=f_classif, k=new_features)
- k_best_selector.fit(X, y)
- scores = k_best_selector.scores_
- column_scores = [{'attribute': name, 'result': round(score, 2)} for name, score in zip(X.columns, scores)]
- features = sorted(column_scores, key=lambda x: x['result'], reverse=True)
- print("Ranking cech:")
- for row in features:
- print(row)
- numberOfAttributes = 7 # przy 6 przestaje rosnąć
- numberOfNeurons = 11
- numberOfTrials = 5
- runExperiments(X, y, features, 6)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement