daily pastebin goal
89%
SHARE
TWEET

Untitled

a guest Jan 17th, 2019 55 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2. from sklearn.datasets import make_multilabel_classification
  3. from sklearn.ensemble import GradientBoostingClassifier
  4. from sklearn.model_selection import train_test_split
  5. from sklearn.metrics import confusion_matrix
  6.  
  7. # Dataset init
  8. x, y = make_multilabel_classification(n_samples=1000, n_features=10, n_classes=3, n_labels=1, random_state=0)
  9. y = y.sum(axis=1)
  10. x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0, test_size=0.33)
  11.  
  12. # Classification
  13. classifier = GradientBoostingClassifier()
  14. classifier.fit(x_train, y_train)
  15. y_score = classifier.predict(x_test)
  16. cm = confusion_matrix(y_test, y_score)
  17.  
  18. def calculate_tpr_tnr(cm):
  19.    
  20.     """
  21.     Sensitivity (TPR) and specificity (TNR) calculation
  22.     per class for scikit-learn machine learning algorithms.
  23.    
  24.     -------
  25.     cm : ndarray
  26.         Confusion matrix obtained with `sklearn.metrics.confusion_matrix`
  27.         method.
  28.    
  29.     Returns
  30.     -------
  31.     sensitivities : ndarray
  32.         Array of sensitivity values per each class.
  33.    
  34.     specificities : ndarray
  35.         Array of specificity values per each class.
  36.     """
  37.     # Sensitivity = TP/(TP + FN)
  38.     # TP of a class is a diagonal element
  39.     # Sum of all values in a row is TP + FN
  40.     # So, we can vectorize it this way:
  41.     sensitivities = np.diag(cm) / np.sum(cm, axis=1)
  42.  
  43.     # Specificity = TN/(TN + FP)
  44.     # FP is the sum of all values in a column excluding TP (diagonal element)
  45.     # TN of a class is the sum of all cols and rows excluding this class' col and row
  46.     # A bit harder case...
  47.     # TN + FP
  48.     cm_sp = np.tile(cm, (cm.shape[0], 1, 1))
  49.     z = np.zeros(cm_sp.shape)
  50.     ids = np.arange(cm_sp.shape[0])
  51.  
  52.     # Placing a row mask
  53.     # That will be our TN + FP vectorized calculation
  54.     z[ids, ids, :] = 1
  55.     tnfp = np.ma.array(cm_sp, mask=z).sum(axis=(1, 2))
  56.  
  57.     # TN
  58.     # Now adding a column mask
  59.     z[ids, :, ids] = 1
  60.     tn = np.ma.array(cm_sp, mask=z).sum(axis=(1, 2))
  61.  
  62.     # Finally, calculating specificities per each class
  63.     specificities = (tn / tnfp).filled()
  64.    
  65.     return sensitivities, specificities
  66.  
  67. calculate_tpr_tnr(cm)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top