Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- from time import time
- from sklearn.calibration import CalibratedClassifierCV, calibration_curve
- from sklearn.metrics import (brier_score_loss, precision_score, recall_score,
- f1_score, log_loss)
- from sklearn.linear_model import LogisticRegression
- def plot_calibration_curve(est, name, X_train, X_test, y_train, y_test, plot=True):
- """Plot calibration curve for est w/o and with calibration. """
- # Calibrated with isotonic calibration
- isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')
- # Calibrated with sigmoid calibration
- sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')
- # Logistic regression with no calibration as baseline
- lr = LogisticRegression(C=1., solver='lbfgs')
- if plot:
- fig = plt.figure(1, figsize=(10, 10))
- ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
- ax2 = plt.subplot2grid((3, 1), (2, 0))
- ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
- best_logloss = 100
- for clf, name in [(lr, 'Logistic'),
- (est, name),
- (isotonic, name + ' + Isotonic'),
- (sigmoid, name + ' + Sigmoid')]:
- t0 = time()
- clf.fit(X_train, y_train)
- y_pred = clf.predict(X_test)
- t = time()
- if hasattr(clf, "predict_proba"):
- prob_pos = clf.predict_proba(X_test)[:, 1]
- else: # use decision function
- prob_pos = clf.decision_function(X_test)
- prob_pos = \
- (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
- clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
- print("%s:" % name)
- print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
- print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
- logloss = log_loss(y_test, prob_pos)
- print("\tLog_loss: %1.3f" % logloss)
- print("\tTime: %1.2f\n" % (t-t0))
- if logloss < best_logloss:
- best_logloss = logloss
- best_clf = name
- best_time = t-t0
- fraction_of_positives, mean_predicted_value = \
- calibration_curve(y_test, prob_pos, n_bins=10)
- if plot:
- ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
- label="%s (%1.3f)" % (name, clf_score))
- ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
- histtype="step", lw=2)
- if plot:
- ax1.set_ylabel("Fraction of positives")
- ax1.set_ylim([-0.05, 1.05])
- ax1.legend(loc="lower right")
- ax1.set_title('Calibration plots (reliability curve)')
- ax2.set_xlabel("Mean predicted value")
- ax2.set_ylabel("Count")
- ax2.legend(loc="upper center", ncol=2)
- plt.tight_layout()
- return best_clf, best_logloss, t-t0
Add Comment
Please, Sign In to add comment