Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn import tree
- from sklearn import ensemble
- from sklearn.model_selection import train_test_split
- import numpy as np
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.model_selection import GridSearchCV
- data = pd.read_csv("C:/users/vova/desktop/files/x_train.csv")
- ans = pd.read_csv("C:/users/vova/desktop/files/y_train.csv", header = None)
- predictions = pd.read_csv("C:/users/vova/desktop/files/x_test.csv")
- data["AttmptsPerDay"] = data["totalNumOfAttempts"]/data["numberOfDaysActuallyPlayed"]
- predictions["AttmptsPerDay"] = predictions["totalNumOfAttempts"]/predictions["numberOfDaysActuallyPlayed"]
- #print(data.head())
- X_train, X_test, y_train, y_test = train_test_split(data, ans, test_size = 0.3, random_state = 11)
- rf = ensemble.RandomForestClassifier(n_estimators=150, random_state=11)
- rf.fit(X_train, y_train.values.ravel())
- y_train_predict = rf.predict(X_train)
- y_test_predict = rf.predict(X_test)
- importances = rf.feature_importances_
- indices = np.argsort(importances)[::-1]
- print("Feature importances:", importances)
- print(np.mean(y_test != list(y_test_predict)))
- print(np.mean(y_train != list(y_train_predict)))
- answer = rf.predict(predictions)
- f = open("C:/users/vova/desktop/y_test.csv", "w")
- for i in answer:
- f.write(str(i) + '\n')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement