Advertisement
Guest User

Untitled

a guest
Feb 19th, 2017
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.32 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn import tree
  3. from sklearn import ensemble
  4. from sklearn.model_selection import train_test_split
  5. import numpy as np
  6. from sklearn.neighbors import KNeighborsClassifier
  7. from sklearn.model_selection import GridSearchCV
  8.  
  9. data = pd.read_csv("C:/users/vova/desktop/files/x_train.csv")
  10. ans = pd.read_csv("C:/users/vova/desktop/files/y_train.csv", header = None)
  11. predictions = pd.read_csv("C:/users/vova/desktop/files/x_test.csv")
  12.  
  13.  
  14. data["AttmptsPerDay"] = data["totalNumOfAttempts"]/data["numberOfDaysActuallyPlayed"]
  15. predictions["AttmptsPerDay"] = predictions["totalNumOfAttempts"]/predictions["numberOfDaysActuallyPlayed"]
  16.  
  17.  
  18. #print(data.head())
  19. X_train, X_test, y_train, y_test = train_test_split(data, ans, test_size = 0.3, random_state = 11)
  20.  
  21. rf = ensemble.RandomForestClassifier(n_estimators=150, random_state=11)
  22. rf.fit(X_train, y_train.values.ravel())
  23.  
  24. y_train_predict = rf.predict(X_train)
  25. y_test_predict = rf.predict(X_test)
  26.  
  27. importances = rf.feature_importances_
  28. indices = np.argsort(importances)[::-1]
  29.  
  30. print("Feature importances:", importances)
  31.  
  32. print(np.mean(y_test != list(y_test_predict)))
  33. print(np.mean(y_train != list(y_train_predict)))
  34.  
  35. answer = rf.predict(predictions)
  36. f = open("C:/users/vova/desktop/y_test.csv", "w")
  37. for i in answer:
  38.     f.write(str(i) + '\n')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement