Advertisement
Guest User

Untitled

a guest
May 20th, 2019
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.16 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.utils import shuffle
  4. from sklearn.model_selection import GridSearchCV
  5. from sklearn.decomposition import PCA
  6. from sklearn.preprocessing import LabelEncoder , StandardScaler
  7. from sklearn.metrics import brier_score_loss, make_scorer
  8. from sklearn.decomposition import PCA
  9. from sklearn.preprocessing import StandardScaler
  10. from sklearn.ensemble import RandomForestClassifier
  11. import datetime as datetime
  12. from sklearn.svm import SVC
  13.  
  14.  
  15. brier = make_scorer(brier_score_loss, greater_is_better = False , needs_proba = True)
  16.  
  17.  
  18.  
  19. def test(year):
  20.     print("-------------------------------------------------------")
  21.     print(year)
  22.     print(datetime.datetime.now())
  23.     print()
  24.     print()
  25.     full_training_df = pd.read_csv("yty_train_" + str(year) + ".csv")
  26.     full_testing_df = pd.read_csv("yty_test_" + str(year) + ".csv")
  27.     X_test = full_testing_df.drop("y_test" , axis = 1)
  28.     y_test = full_testing_df["y_test"]
  29.  
  30.     X_train = full_training_df.drop("y_test" , axis = 1)
  31.     y_train = full_training_df["y_test"]
  32.  
  33.  
  34.     X_train = X_train[X_train.columns & X_test.columns]
  35.     X_test = X_test[X_train.columns & X_test.columns ]
  36.  
  37.  
  38.     rf_clf = RandomForestClassifier(n_estimators = 5000 , max_depth = 35 ,
  39.                                          n_jobs = -1)
  40.  
  41.  
  42.     t1 = datetime.datetime.now()
  43.     rf_clf.fit(X_train , y_train)
  44.     t2 = datetime.datetime.now()
  45.     print("time to fit:" , t2-t1)
  46.  
  47.  
  48.     t1 = datetime.datetime.now()
  49.     predicted_probs = pd.DataFrame(rf_clf.predic_proba(X_test_full)[:,1])
  50.     t2 = datetime.datetime.now()
  51.     print("time to test:" , t2-t1)
  52.  
  53.  
  54.     print("test score:" , brier_score_loss(y_test , predicted_probs))
  55.  
  56.  
  57.     predicted_probs.columns = ["predicted_prob"]
  58.  
  59.  
  60.     y_test.columns = ["true_prob"]
  61.  
  62.  
  63.    
  64.     joined_df = pd.concat([predicted_probs , y_test], axis=1, sort=False)
  65.  
  66.     joined_df["adjusted"] = 0
  67.  
  68.     N_plus = joined["true_prob"].sum()
  69.     N_minus = joined.shape[0]-N_plus
  70.     beta = N_plus / N_minus
  71.     joined_df.adjusted = (beta*joined.predicted_probs)/(beta*joined.predicted_probs-joined.predicted_probs+1)
  72.  
  73.  
  74.     joined_df.to_csv(str(year) + "results_full_rf.csv" , index = False)
  75.  
  76.  
  77. test(15)
  78. test(16)
  79. test(17)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement