Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import csv
- from tqdm import tqdm
- from sklearn.model_selection import train_test_split
- import numpy as np
- from sklearn.neural_network import MLPClassifier
- df_train = pd.read_csv('TrainOnMe-4.csv')
- df_eval = pd.read_csv('EvaluateOnMe-4.csv')
- def dataManagement(dataSet):
- train, test = train_test_split(dataSet, test_size=0.3)
- del(train["Unnamed: 0"],test["Unnamed: 0"])
- labels = train["y"]
- test_label = test["y"]
- y_dic = {
- "C": 1,
- "L": 0
- }
- labels = labels.replace(y_dic)
- test_label = test_label.replace(y_dic).to_list()
- del(train["y"],test["y"])
- #Get character encodings for ET with ord()
- et_dic = {
- "B": ord('B'),
- "W": ord('W'),
- "I": ord('I'),
- "A": ord('A')
- }
- train["et"] = train["et"].replace(et_dic)
- test["et"] = test["et"].replace(et_dic)
- #Lastly normalize the test and training sets
- train = (train-train.mean())/train.std()
- test = (test-test.mean())/test.std()
- return labels, train, test_label, test
- def dataEval(dataSet):
- del(dataSet["Unnamed: 0"])
- et_dic = {
- "B": ord('B'),
- "W": ord('W'),
- "I": ord('I'),
- "A": ord('A')
- }
- dataSet["et"] = dataSet["et"].replace(et_dic)
- dataSet = (dataSet-dataSet.mean())/dataSet.std()
- return dataSet
- def dataMod(prediction):
- dic_pred = {
- 0: 'L',
- 1: 'C'
- }
- out = [dic_pred[letter] for letter in prediction]
- out = ''.join(out)
- return out
- def writeToTxt(prediction):
- with open('prediction.txt', 'w') as f:
- for line in prediction:
- f.write(line)
- f.write('\n')
- return 0
- #Best result I could get was 61.5% accuracy on the validation set but that was with 360 nodes and 12 layers, which isn't feasible to run on a regular pc.
- if __name__=="__main__":
- trRes = []
- treeRes = []
- y_train,X_train,y_test,X_test = dataManagement(df_train)
- X_eval = dataEval(df_eval)
- clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(64,8), random_state=1, max_iter=1000) #To validate might wanna reduce no. of layers, takes a while even on my PC.
- clf.fit(X_train,y_train)
- #trRes.append(clf.score(X_test,y_test))
- y_predict = clf.predict(X_eval)
- y_pred_clean = dataMod(y_predict)
- writeToTxt(y_pred_clean)
- #print(trRes[-1])
- #y_predict = clf.predict(X_eval)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement