Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = pd.read_csv("Imbalanced_data.csv") #Load the data set
- X = df.iloc[:,0:64]
- X = X.values
- y = df.iloc[:,64]
- y = y.values
- n_splits = 2
- n_measures = 2 #Recall and AUC
- kf = StratifiedKFold(n_splits=n_splits) #Stratified because we need balanced samples
- kf.get_n_splits(X)
- clf_rf = RandomForestClassifier(n_estimators=25, random_state=1)
- s =(n_splits,n_measures)
- scores = np.zeros(s)
- for train_index, test_index in kf.split(X,y):
- print("TRAIN:", train_index, "TEST:", test_index)
- X_train, X_test = X[train_index], X[test_index]
- y_train, y_test = y[train_index], y[test_index]
- sm = SMOTE(ratio = 'auto',k_neighbors = 5, n_jobs = -1)
- smote_enn = SMOTEENN(smote = sm)
- x_train_res, y_train_res = smote_enn.fit_sample(X_train, y_train)
- clf_rf.fit(x_train_res, y_train_res)
- y_pred = clf_rf.predict(X_test,y_test)
- scores[test_index,1] = recall_score(y_test, y_pred)
- scores[test_index,2] = auc(y_test, y_pred)
Add Comment
Please, Sign In to add comment