Advertisement
cyphric

challengeML

Mar 16th, 2022
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.44 KB | None | 0 0
  1. import pandas as pd
  2. import csv
  3. from tqdm import tqdm
  4. from sklearn.model_selection import train_test_split
  5. import numpy as np
  6. from sklearn.neural_network import MLPClassifier
  7.  
  8. df_train = pd.read_csv('TrainOnMe-4.csv')
  9. df_eval = pd.read_csv('EvaluateOnMe-4.csv')
  10.  
  11.  
  12. def dataManagement(dataSet):
  13. train, test = train_test_split(dataSet, test_size=0.3)
  14. del(train["Unnamed: 0"],test["Unnamed: 0"])
  15. labels = train["y"]
  16. test_label = test["y"]
  17. y_dic = {
  18. "C": 1,
  19. "L": 0
  20. }
  21. labels = labels.replace(y_dic)
  22. test_label = test_label.replace(y_dic).to_list()
  23. del(train["y"],test["y"])
  24. #Get character encodings for ET with ord()
  25. et_dic = {
  26. "B": ord('B'),
  27. "W": ord('W'),
  28. "I": ord('I'),
  29. "A": ord('A')
  30. }
  31. train["et"] = train["et"].replace(et_dic)
  32. test["et"] = test["et"].replace(et_dic)
  33. #Lastly normalize the test and training sets
  34. train = (train-train.mean())/train.std()
  35. test = (test-test.mean())/test.std()
  36.  
  37. return labels, train, test_label, test
  38.  
  39.  
  40. def dataEval(dataSet):
  41.  
  42. del(dataSet["Unnamed: 0"])
  43. et_dic = {
  44. "B": ord('B'),
  45. "W": ord('W'),
  46. "I": ord('I'),
  47. "A": ord('A')
  48. }
  49. dataSet["et"] = dataSet["et"].replace(et_dic)
  50. dataSet = (dataSet-dataSet.mean())/dataSet.std()
  51. return dataSet
  52.  
  53.  
  54. def dataMod(prediction):
  55. dic_pred = {
  56. 0: 'L',
  57. 1: 'C'
  58. }
  59. out = [dic_pred[letter] for letter in prediction]
  60. out = ''.join(out)
  61. return out
  62.  
  63. def writeToTxt(prediction):
  64. with open('prediction.txt', 'w') as f:
  65. for line in prediction:
  66. f.write(line)
  67. f.write('\n')
  68. return 0
  69.  
  70. #Best result I could get was 61.5% accuracy on the validation set but that was with 360 nodes and 12 layers, which isn't feasible to run on a regular pc.
  71. if __name__=="__main__":
  72. trRes = []
  73. treeRes = []
  74. y_train,X_train,y_test,X_test = dataManagement(df_train)
  75. X_eval = dataEval(df_eval)
  76. clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(64,8), random_state=1, max_iter=1000) #To validate might wanna reduce no. of layers, takes a while even on my PC.
  77. clf.fit(X_train,y_train)
  78. #trRes.append(clf.score(X_test,y_test))
  79.  
  80. y_predict = clf.predict(X_eval)
  81. y_pred_clean = dataMod(y_predict)
  82.  
  83. writeToTxt(y_pred_clean)
  84. #print(trRes[-1])
  85. #y_predict = clf.predict(X_eval)
  86.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement