Guest User

Untitled

a guest
Jan 21st, 2018
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.91 KB | None | 0 0
  1. df = pd.read_csv("Imbalanced_data.csv") #Load the data set
  2. X = df.iloc[:,0:64]
  3. X = X.values
  4. y = df.iloc[:,64]
  5. y = y.values
  6. n_splits = 2
  7. n_measures = 2 #Recall and AUC
  8. kf = StratifiedKFold(n_splits=n_splits) #Stratified because we need balanced samples
  9. kf.get_n_splits(X)
  10. clf_rf = RandomForestClassifier(n_estimators=25, random_state=1)
  11. s =(n_splits,n_measures)
  12. scores = np.zeros(s)
  13. for train_index, test_index in kf.split(X,y):
  14. print("TRAIN:", train_index, "TEST:", test_index)
  15. X_train, X_test = X[train_index], X[test_index]
  16. y_train, y_test = y[train_index], y[test_index]
  17. sm = SMOTE(ratio = 'auto',k_neighbors = 5, n_jobs = -1)
  18. smote_enn = SMOTEENN(smote = sm)
  19. x_train_res, y_train_res = smote_enn.fit_sample(X_train, y_train)
  20. clf_rf.fit(x_train_res, y_train_res)
  21. y_pred = clf_rf.predict(X_test,y_test)
  22. scores[test_index,1] = recall_score(y_test, y_pred)
  23. scores[test_index,2] = auc(y_test, y_pred)
Add Comment
Please, Sign In to add comment