Advertisement
Guest User

Untitled

a guest
Nov 17th, 2019
111
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.99 KB | None | 0 0
  1. from sklearn.neural_network import MLPClassifier
  2. from sklearn.model_selection import GridSearchCV
  3. from sklearn.metrics import classification_report
  4. from sklearn.svm import SVC
  5. from sklearn.metrics import confusion_matrix
  6. from sklearn.metrics import roc_auc_score
  7. from sklearn.metrics import roc_curve
  8.  
  9. # RANDOM FOREST CLASSIFICATION
  10. from sklearn.ensemble import RandomForestClassifier
  11. rfc = RandomForestClassifier(n_estimators=45, max_depth=19)
  12. rfc.fit(x_train, y_train_class)
  13. pred_y = rfc.predict(test_x)
  14. print(classification_report(y_true=real_y_class, y_pred=pred_y,labels=[0,1], target_names=['not popular', 'popular']))
  15. plot_confusion_matrix(real_y_class, pred_y)
  16. plt.show()
  17.  
  18. #auroc score
  19. probs = rfc.predict_proba(test_x)[:, 1]
  20. rfc_auc = roc_auc_score(real_y_class, probs)
  21. print(rfc_auc)
  22. rfc_fpr, rfc_tpr, _ = roc_curve(real_y_class, probs)
  23.  
  24.  
  25.  
  26. # NEURAL NET CLASSIFICATION
  27. mlpc = MLPClassifier(max_iter=200, hidden_layer_sizes=(22,),activation='relu', alpha=.001, learning_rate='adaptive')
  28. mlpc.fit(x_train, y_train_class)
  29. y_pred = mlpc.predict(test_x)
  30.  
  31. print(classification_report(y_true=real_y_class, y_pred=y_pred,labels=[0,1], target_names=['not popular', 'popular']))
  32. plot_confusion_matrix(real_y_class, y_pred)
  33. plt.show()
  34.  
  35. #auroc score
  36. probs = mlpc.predict_proba(test_x)[:, 1]
  37. mlpc_auc = roc_auc_score(real_y_class, probs)
  38. print(mlpc_auc)
  39.  
  40. mlpc_fpr, mlpc_tpr, _ = roc_curve(real_y_class, probs)
  41.  
  42. # projDataTest_6comp
  43. # projDataTrain_6comp
  44. from sklearn.ensemble import RandomForestClassifier
  45. rfc = RandomForestClassifier(n_estimators=9, max_depth=5)
  46. rfc.fit(projDataTrain_6comp, y_train_class)
  47. pred_y = rfc.predict(projDataTest_6comp)
  48. print(classification_report(y_true=real_y_class, y_pred=pred_y,labels=[0,1], target_names=['not popular', 'popular']))
  49. plot_confusion_matrix(real_y_class, pred_y)
  50. plt.show()
  51.  
  52. #auroc score
  53. probs = rfc.predict_proba(projDataTest_6comp)[:, 1]
  54. rfc_pca_auc = roc_auc_score(real_y_class, probs)
  55. print(rfc_pca_auc)
  56.  
  57. pca_fpr, pca_tpr, _ = roc_curve(real_y_class, probs)
  58. plt.plot(rfc_fpr, rfc_tpr, label='RF')
  59. plt.plot(mlpc_fpr, mlpc_tpr, label='NN')
  60. plt.plot(pca_fpr, pca_tpr, label='RF_PCA')
  61. plt.xlabel('False Positive Rate')
  62. plt.ylabel('True Positive Rate')
  63. plt.title('ROC Curves')
  64. plt.legend()
  65. plt.show()
  66.  
  67. scores = [rfc_auc, mlpc_auc, rfc_pca_auc]
  68. labels = ['RF', 'NN', 'RF_PCA']
  69. plt.bar(labels, scores, log=True)
  70. plt.xlabel('Model')
  71. plt.ylabel('AUROC Score')
  72. plt.title('AUROC Scores by Model')
  73. plt.show()
  74. # rfc = RandomForestClassifier() --> {'max_depth': 5, 'n_estimators': 9}
  75. # parameter_space = {
  76. # 'n_estimators': [8, 9, 10, 11],
  77. # 'max_depth': [5, 6, 7, 8],
  78. # }
  79.  
  80. # clf = GridSearchCV(rfc, parameter_space, n_jobs=-1, cv=10)
  81. # %time clf.fit(projDataTrain_6comp, y_train_class)
  82. # print('Best parameters found: \n', clf.best_params_)
  83.  
  84. # pred_y = clf.predict(projDataTest_6comp)
  85. # print('Results on the test set:')
  86. # print(classification_report(y_true=real_y_class, y_pred=pred_y, labels=[0,1], target_names=['Not Popular', 'Popular']))
  87.  
  88. # grid search for optimal parameters -> {'alpha': 0.001, 'hidden_layer_sizes': (22,), 'learning_rate': 'adaptive'}
  89. # mlpc = MLPClassifier(learning_rate='adaptive')
  90. # parameter_space = {
  91. # 'hidden_layer_sizes': [(10,), (15,), (20,), (25,)],
  92. # 'alpha': [.001],
  93. # 'learning_rate': ['constant','adaptive'],
  94. # }
  95. # parameter_space = {
  96. # 'hidden_layer_sizes': [(19,), (20,), (21,), (22,), (23,)]
  97. # # }
  98.  
  99. # GRID SEARCH FOR RFC -> optimal parameters are : {'max_depth': 19, 'n_estimators': 45}
  100. # rfc = RandomForestClassifier()
  101. # parameter_space = {
  102. # 'n_estimators': [43, 44, 45, 46, 47],
  103. # 'max_depth': [18, 19, 20, 21, 22],
  104. # }
  105.  
  106. # clf = GridSearchCV(rfc, parameter_space, n_jobs=-1, cv=10)
  107. # %time clf.fit(x_train, y_train_class)
  108. # print('Best parameters found: \n', clf.best_params_)
  109.  
  110. # pred_y = clf.predict(test_x)
  111. # print('Results on the test set:')
  112. # print(classification_report(y_true=real_y_class, y_pred=pred_y, labels=[0,1], target_names=['Not Popular', 'Popular']))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement