Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.neural_network import MLPClassifier
- from sklearn.model_selection import GridSearchCV
- from sklearn.metrics import classification_report
- from sklearn.svm import SVC
- from sklearn.metrics import confusion_matrix
- from sklearn.metrics import roc_auc_score
- from sklearn.metrics import roc_curve
- # RANDOM FOREST CLASSIFICATION
- from sklearn.ensemble import RandomForestClassifier
- rfc = RandomForestClassifier(n_estimators=45, max_depth=19)
- rfc.fit(x_train, y_train_class)
- pred_y = rfc.predict(test_x)
- print(classification_report(y_true=real_y_class, y_pred=pred_y,labels=[0,1], target_names=['not popular', 'popular']))
- plot_confusion_matrix(real_y_class, pred_y)
- plt.show()
- #auroc score
- probs = rfc.predict_proba(test_x)[:, 1]
- rfc_auc = roc_auc_score(real_y_class, probs)
- print(rfc_auc)
- rfc_fpr, rfc_tpr, _ = roc_curve(real_y_class, probs)
- # NEURAL NET CLASSIFICATION
- mlpc = MLPClassifier(max_iter=200, hidden_layer_sizes=(22,),activation='relu', alpha=.001, learning_rate='adaptive')
- mlpc.fit(x_train, y_train_class)
- y_pred = mlpc.predict(test_x)
- print(classification_report(y_true=real_y_class, y_pred=y_pred,labels=[0,1], target_names=['not popular', 'popular']))
- plot_confusion_matrix(real_y_class, y_pred)
- plt.show()
- #auroc score
- probs = mlpc.predict_proba(test_x)[:, 1]
- mlpc_auc = roc_auc_score(real_y_class, probs)
- print(mlpc_auc)
- mlpc_fpr, mlpc_tpr, _ = roc_curve(real_y_class, probs)
- # projDataTest_6comp
- # projDataTrain_6comp
- from sklearn.ensemble import RandomForestClassifier
- rfc = RandomForestClassifier(n_estimators=9, max_depth=5)
- rfc.fit(projDataTrain_6comp, y_train_class)
- pred_y = rfc.predict(projDataTest_6comp)
- print(classification_report(y_true=real_y_class, y_pred=pred_y,labels=[0,1], target_names=['not popular', 'popular']))
- plot_confusion_matrix(real_y_class, pred_y)
- plt.show()
- #auroc score
- probs = rfc.predict_proba(projDataTest_6comp)[:, 1]
- rfc_pca_auc = roc_auc_score(real_y_class, probs)
- print(rfc_pca_auc)
- pca_fpr, pca_tpr, _ = roc_curve(real_y_class, probs)
- plt.plot(rfc_fpr, rfc_tpr, label='RF')
- plt.plot(mlpc_fpr, mlpc_tpr, label='NN')
- plt.plot(pca_fpr, pca_tpr, label='RF_PCA')
- plt.xlabel('False Positive Rate')
- plt.ylabel('True Positive Rate')
- plt.title('ROC Curves')
- plt.legend()
- plt.show()
- scores = [rfc_auc, mlpc_auc, rfc_pca_auc]
- labels = ['RF', 'NN', 'RF_PCA']
- plt.bar(labels, scores, log=True)
- plt.xlabel('Model')
- plt.ylabel('AUROC Score')
- plt.title('AUROC Scores by Model')
- plt.show()
- # rfc = RandomForestClassifier() --> {'max_depth': 5, 'n_estimators': 9}
- # parameter_space = {
- # 'n_estimators': [8, 9, 10, 11],
- # 'max_depth': [5, 6, 7, 8],
- # }
- # clf = GridSearchCV(rfc, parameter_space, n_jobs=-1, cv=10)
- # %time clf.fit(projDataTrain_6comp, y_train_class)
- # print('Best parameters found: \n', clf.best_params_)
- # pred_y = clf.predict(projDataTest_6comp)
- # print('Results on the test set:')
- # print(classification_report(y_true=real_y_class, y_pred=pred_y, labels=[0,1], target_names=['Not Popular', 'Popular']))
- # grid search for optimal parameters -> {'alpha': 0.001, 'hidden_layer_sizes': (22,), 'learning_rate': 'adaptive'}
- # mlpc = MLPClassifier(learning_rate='adaptive')
- # parameter_space = {
- # 'hidden_layer_sizes': [(10,), (15,), (20,), (25,)],
- # 'alpha': [.001],
- # 'learning_rate': ['constant','adaptive'],
- # }
- # parameter_space = {
- # 'hidden_layer_sizes': [(19,), (20,), (21,), (22,), (23,)]
- # # }
- # GRID SEARCH FOR RFC -> optimal parameters are : {'max_depth': 19, 'n_estimators': 45}
- # rfc = RandomForestClassifier()
- # parameter_space = {
- # 'n_estimators': [43, 44, 45, 46, 47],
- # 'max_depth': [18, 19, 20, 21, 22],
- # }
- # clf = GridSearchCV(rfc, parameter_space, n_jobs=-1, cv=10)
- # %time clf.fit(x_train, y_train_class)
- # print('Best parameters found: \n', clf.best_params_)
- # pred_y = clf.predict(test_x)
- # print('Results on the test set:')
- # print(classification_report(y_true=real_y_class, y_pred=pred_y, labels=[0,1], target_names=['Not Popular', 'Popular']))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement