Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2020
240
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.22 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. import math
  3. from collections import OrderedDict, defaultdict
  4. from dataclasses import dataclass, field
  5. from typing import Optional, Tuple, Union, Any
  6. from concurrent.futures import ThreadPoolExecutor
  7. import asyncio
  8. import math
  9. from functools import wraps
  10. import time
  11. from itertools import repeat
  12. from concurrent.futures import *
  13. from collections import defaultdict
  14.  
  15. import pandas as pd
  16. import numpy as np
  17. import joblib
  18. import numba
  19. from numba import jit
  20. from numba import types
  21. from numba.typed import Dict
  22.  
  23. #@jit(nopython=True)
  24. def confusion_matrix(y_true, y_pred, labels, n_labels):
  25.     result = np.zeros((n_labels, n_labels), dtype=np.int32)
  26.  
  27.     for i in range(len(y_true)):
  28.         result[y_true[i]][y_pred[i]] += 1
  29.  
  30.     return result
  31.  
  32. @jit(nopython=True)
  33. def metrics_from_confusion_matrix(confusion_matrix, classes):
  34.     metrics = Dict.empty(
  35.         key_type=types.unicode_type,
  36.         value_type=types.float64
  37.     )
  38.     zero_matrix = np.zeros(confusion_matrix.shape, dtype=numba.int32)
  39.  
  40.     for i in range(len(classes)):
  41.         tp = confusion_matrix[i, i]
  42.  
  43.         # FN = row i - TP
  44.         fn_mask = np.copy(zero_matrix)
  45.         fn_mask[i, :] = 1
  46.         fn_mask[i, i] = 0
  47.         fn = np.sum(np.multiply(confusion_matrix, fn_mask))
  48.  
  49.         # FP = column i - TP
  50.         fp_mask = np.copy(zero_matrix)
  51.         fp_mask[:, i] = 1
  52.         fp_mask[i, i] = 0
  53.         fp = np.sum(np.multiply(confusion_matrix, fp_mask))
  54.  
  55.         # TN = everything - TP - FN - FP
  56.         #tn_mask = 1 - (fn_mask + fp_mask)
  57.         #tn_mask[i, i] = 0  # for TP
  58.         #tn = np.sum(np.multiply(confusion_matrix, tn_mask))
  59.  
  60.         # TPR = TP/(TP+FN)
  61.         tpr = tp / (tp + fn)
  62.  
  63.         # FPR = FP/(FP+TN)
  64.         #fpr = fp / (fp + tn)
  65.  
  66.         precision = tp/(tp + fp)
  67.  
  68.         metrics[classes[i]] = {
  69.             "precision": precision,
  70.             "recall": tpr
  71.         }
  72.  
  73.     return metrics
  74.  
  75. def reporter(iteration):
  76.     # bootstrap samples
  77.     df_local = df_encoded_np[np.random.choice(df_length, size=df_length, replace=True), :]
  78.     matrix = confusion_matrix(df_local[:,0], df_local[:,1], labels=labels, n_labels=n_labels)
  79.     report = metrics_from_confusion_matrix(matrix, labels)
  80.     # store in a nested dict by [class_name][metric_name]
  81.     results = dict()
  82.     for key, val in report.items():
  83.         for metric_name, metric_val in val.items():
  84.             if metric_name in ['precision','recall']:
  85.                 results[key + '_' + metric_name] = metric_val
  86.     return results
  87.  
  88. def interval_calculator(key, values):
  89.     # return 95% confidence interval of values
  90.     sorted_values = sorted(values)
  91.     n_values = len(sorted_values)
  92.     start_index = math.floor(n_values*0.025)
  93.     end_index = math.floor(n_values*0.975)
  94.     return [key, sorted_values[start_index], sorted_values[end_index]]
  95.  
  96. def run():
  97.     """with ProcessPoolExecutor(8) as executor:
  98.        results = list(executor.map(reporter, range(n)))
  99.  
  100.        all_results = defaultdict(list)
  101.        for result in results:
  102.            # each result is a dict
  103.            for key, val in result.items():
  104.                all_results[key].append(val)
  105.        results = list(executor.map(interval_calculator, *zip(*all_results.items())))
  106.        print(results)"""
  107.     # TODO: use numba parallel here instead
  108.     results = reporter(1)
  109.     print(results)
  110.  
  111. estimates = defaultdict(lambda: defaultdict(list))
  112. intervals = defaultdict(lambda: defaultdict(list))
  113.  
  114. df = pd.DataFrame({'id': list(range(10)), 'label': ['A','B','C','D','C','A','B','B','A','D'], 'predicted': ['B','A','D','A','B','C','C','B','D','A']})
  115. df_length = len(df)
  116. sample_weight = np.ones(df_length, dtype=np.int32)
  117.  
  118. labels = np.unique(df['label'])
  119. n_labels = len(labels)
  120. df_subset = df[['id','label','predicted']]
  121. mapping_df = pd.DataFrame({'class': labels, 'value': range(len(labels))})
  122. df_label_encoded = df_subset.merge(mapping_df, left_on='label', right_on='class')
  123. df_predicted_encoded = df_subset.merge(mapping_df, left_on='predicted', right_on='class')
  124. df_encoded = df_label_encoded.merge(df_predicted_encoded, on='id', suffixes=['_label','_predicted'])
  125. df_encoded_np = df_encoded[['value_label','value_predicted']].to_numpy()
  126. n = 1000
  127.  
  128. run()
  129.  
  130.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement