Guest User

Untitled

a guest
Mar 17th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.86 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. from time import time
  3.  
  4. from sklearn.calibration import CalibratedClassifierCV, calibration_curve
  5. from sklearn.metrics import (brier_score_loss, precision_score, recall_score,
  6. f1_score, log_loss)
  7.  
  8. from sklearn.linear_model import LogisticRegression
  9.  
  10. def plot_calibration_curve(est, name, X_train, X_test, y_train, y_test, plot=True):
  11. """Plot calibration curve for est w/o and with calibration. """
  12. # Calibrated with isotonic calibration
  13. isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')
  14.  
  15. # Calibrated with sigmoid calibration
  16. sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')
  17.  
  18. # Logistic regression with no calibration as baseline
  19. lr = LogisticRegression(C=1., solver='lbfgs')
  20.  
  21. if plot:
  22. fig = plt.figure(1, figsize=(10, 10))
  23. ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
  24. ax2 = plt.subplot2grid((3, 1), (2, 0))
  25. ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
  26. best_logloss = 100
  27.  
  28. for clf, name in [(lr, 'Logistic'),
  29. (est, name),
  30. (isotonic, name + ' + Isotonic'),
  31. (sigmoid, name + ' + Sigmoid')]:
  32. t0 = time()
  33. clf.fit(X_train, y_train)
  34. y_pred = clf.predict(X_test)
  35. t = time()
  36. if hasattr(clf, "predict_proba"):
  37. prob_pos = clf.predict_proba(X_test)[:, 1]
  38. else: # use decision function
  39. prob_pos = clf.decision_function(X_test)
  40. prob_pos = \
  41. (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
  42.  
  43. clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
  44. print("%s:" % name)
  45. print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
  46. print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
  47. logloss = log_loss(y_test, prob_pos)
  48. print("\tLog_loss: %1.3f" % logloss)
  49. print("\tTime: %1.2f\n" % (t-t0))
  50.  
  51. if logloss < best_logloss:
  52. best_logloss = logloss
  53. best_clf = name
  54. best_time = t-t0
  55.  
  56. fraction_of_positives, mean_predicted_value = \
  57. calibration_curve(y_test, prob_pos, n_bins=10)
  58.  
  59. if plot:
  60. ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
  61. label="%s (%1.3f)" % (name, clf_score))
  62.  
  63. ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
  64. histtype="step", lw=2)
  65.  
  66. if plot:
  67. ax1.set_ylabel("Fraction of positives")
  68. ax1.set_ylim([-0.05, 1.05])
  69. ax1.legend(loc="lower right")
  70. ax1.set_title('Calibration plots (reliability curve)')
  71.  
  72. ax2.set_xlabel("Mean predicted value")
  73. ax2.set_ylabel("Count")
  74. ax2.legend(loc="upper center", ncol=2)
  75.  
  76. plt.tight_layout()
  77.  
  78. return best_clf, best_logloss, t-t0
Add Comment
Please, Sign In to add comment