Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import sklearn as sk
- import numpy as np
- from sklearn.linear_model import LogisticRegression
- from sklearn.neural_network import MLPClassifier
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.naive_bayes import GaussianNB
- from sklearn.model_selection import train_test_split
- from sklearn import svm, preprocessing
- from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
- import pickle
- from joblib import dump
- # ---------------------------------------------------------------------------------------------------------------
- # This file is used to cross validate against the train.txt dataset itself, and generate model as model.joblib
- # ---------------------------------------------------------------------------------------------------------------
- df = pd.read_csv('train.txt', sep=" ", header=None)
- df = df.drop(df.columns[0], axis=1)
- df.columns = ["v1","v2","v3","v4","v5","v6","v7","v8","v9","v10", "target"]
- X = df.drop(['target'],1)
- y = df['target']
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
- X_train = preprocessing.scale(X_train)
- X_test = preprocessing.scale(X_test)
- print ( "Null accuracy: ", max( y_test.mean(), 1 - y_test.mean() ))
- model = MLPClassifier(solver='lbfgs',shuffle=True, hidden_layer_sizes=(500), random_state=1)
- model.fit(X_train, y_train)
- y_pred = model.predict(X_test)
- print ( "Confusion Matrix:\n" , confusion_matrix(y_test, y_pred) )
- print("Accuracy:",accuracy_score(y_test, y_pred))
- print("Precision:",precision_score(y_test, y_pred))
- print("Recall:",recall_score(y_test, y_pred))
- s = pickle.dumps(model)
- dump(model, 'model.joblib')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement