Untitled

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# RANDOM FOREST CLASSIFICATION
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=45, max_depth=19)
rfc.fit(x_train, y_train_class)
pred_y = rfc.predict(test_x)
print(classification_report(y_true=real_y_class, y_pred=pred_y,labels=[0,1], target_names=['not popular', 'popular']))
plot_confusion_matrix(real_y_class, pred_y)
plt.show()

#auroc score
probs = rfc.predict_proba(test_x)[:, 1]
rfc_auc = roc_auc_score(real_y_class, probs)
print(rfc_auc)
rfc_fpr, rfc_tpr, _ = roc_curve(real_y_class, probs)


# NEURAL NET CLASSIFICATION
mlpc = MLPClassifier(max_iter=200, hidden_layer_sizes=(22,),activation='relu', alpha=.001, learning_rate='adaptive')
mlpc.fit(x_train, y_train_class)
y_pred = mlpc.predict(test_x)

print(classification_report(y_true=real_y_class, y_pred=y_pred,labels=[0,1], target_names=['not popular', 'popular']))
plot_confusion_matrix(real_y_class, y_pred)
plt.show()

#auroc score
probs = mlpc.predict_proba(test_x)[:, 1]
mlpc_auc = roc_auc_score(real_y_class, probs)
print(mlpc_auc)

mlpc_fpr, mlpc_tpr, _ = roc_curve(real_y_class, probs)

# projDataTest_6comp
# projDataTrain_6comp
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=9, max_depth=5)
rfc.fit(projDataTrain_6comp, y_train_class)
pred_y = rfc.predict(projDataTest_6comp)
print(classification_report(y_true=real_y_class, y_pred=pred_y,labels=[0,1], target_names=['not popular', 'popular']))
plot_confusion_matrix(real_y_class, pred_y)
plt.show()

#auroc score
probs = rfc.predict_proba(projDataTest_6comp)[:, 1]
rfc_pca_auc = roc_auc_score(real_y_class, probs)
print(rfc_pca_auc)

pca_fpr, pca_tpr, _ = roc_curve(real_y_class, probs)
plt.plot(rfc_fpr, rfc_tpr, label='RF')
plt.plot(mlpc_fpr, mlpc_tpr, label='NN')
plt.plot(pca_fpr, pca_tpr, label='RF_PCA')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend()
plt.show()

scores = [rfc_auc, mlpc_auc, rfc_pca_auc]
labels = ['RF', 'NN', 'RF_PCA']
plt.bar(labels, scores, log=True)
plt.xlabel('Model')
plt.ylabel('AUROC Score')
plt.title('AUROC Scores by Model')
plt.show()
# rfc = RandomForestClassifier() --> {'max_depth': 5, 'n_estimators': 9}
# parameter_space = {
#     'n_estimators': [8, 9, 10, 11],
#     'max_depth': [5, 6, 7, 8],
# }

# clf = GridSearchCV(rfc, parameter_space, n_jobs=-1, cv=10)
# %time clf.fit(projDataTrain_6comp, y_train_class)
# print('Best parameters found: \n', clf.best_params_)

# pred_y = clf.predict(projDataTest_6comp)
# print('Results on the test set:')
# print(classification_report(y_true=real_y_class, y_pred=pred_y, labels=[0,1], target_names=['Not Popular', 'Popular']))

# grid search for optimal parameters -> {'alpha': 0.001, 'hidden_layer_sizes': (22,), 'learning_rate': 'adaptive'}
# mlpc = MLPClassifier(learning_rate='adaptive')
# parameter_space = {
#     'hidden_layer_sizes': [(10,), (15,), (20,), (25,)],
#     'alpha': [.001],
#     'learning_rate': ['constant','adaptive'],
# }
# parameter_space = {
#     'hidden_layer_sizes': [(19,), (20,), (21,), (22,), (23,)]
# # }

# GRID SEARCH FOR RFC -> optimal parameters are : {'max_depth': 19, 'n_estimators': 45}
# rfc = RandomForestClassifier()
# parameter_space = {
#     'n_estimators': [43, 44, 45, 46, 47],
#     'max_depth': [18, 19, 20, 21, 22],
# }

# clf = GridSearchCV(rfc, parameter_space, n_jobs=-1, cv=10)
# %time clf.fit(x_train, y_train_class)
# print('Best parameters found: \n', clf.best_params_)

# pred_y = clf.predict(test_x)
# print('Results on the test set:')
# print(classification_report(y_true=real_y_class, y_pred=pred_y, labels=[0,1], target_names=['Not Popular', 'Popular']))