Advertisement
Guest User

Untitled

a guest
Mar 26th, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.05 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.metrics import f1_score
  4. from copy import deepcopy
  5.  
  6. from collections import namedtuple
  7.  
  8. # Evaluation metric for Innoplexus NER Challenge
  9.  
  10. def collect_named_entities(tokens): # Helper Function for score calculation
  11. """
  12. Creates a list of Entity named-tuples, storing the entity type and the start and end
  13. offsets of the entity.
  14.  
  15. :param tokens: a list of labels
  16. :return: a list of Entity named-tuples
  17. """
  18. Entity = namedtuple("Entity", "e_type start_offset end_offset")
  19. named_entities = []
  20. start_offset = None
  21. end_offset = None
  22. ent_type = None
  23.  
  24. for offset, token_tag in enumerate(tokens):
  25.  
  26. if token_tag == 'O':
  27. if ent_type is not None and start_offset is not None:
  28. end_offset = offset - 1
  29. named_entities.append(Entity(ent_type, start_offset, end_offset))
  30. start_offset = None
  31. end_offset = None
  32. ent_type = None
  33.  
  34. elif ent_type is None:
  35. ent_type = token_tag[2:]
  36. start_offset = offset
  37.  
  38. elif ent_type != token_tag[2:] or (ent_type == token_tag[2:] and token_tag[:1] == 'B'):
  39.  
  40. end_offset = offset - 1
  41. named_entities.append(Entity(ent_type, start_offset, end_offset))
  42.  
  43. # start of a new entity
  44. ent_type = token_tag[2:]
  45. start_offset = offset
  46. end_offset = None
  47.  
  48. # catches an entity that goes up until the last token
  49. if ent_type and start_offset and end_offset is None:
  50. named_entities.append(Entity(ent_type, start_offset, len(tokens)-1))
  51.  
  52. return named_entities
  53.  
  54. def compute_metrics(true_named_entities, pred_named_entities): # Helper Function for score calculation
  55. eval_metrics = {'correct': 0, 'partial': 0, 'missed': 0, 'spurius': 0}
  56. target_tags_no_schema = ['indications']
  57.  
  58. # overall results
  59. evaluation = {'partial': deepcopy(eval_metrics)}
  60.  
  61.  
  62. true_which_overlapped_with_pred = [] # keep track of entities that overlapped
  63.  
  64. # go through each predicted named-entity
  65. for pred in pred_named_entities:
  66. found_overlap = False
  67.  
  68. # check if there's an exact match, i.e.: boundary and entity type match
  69. if pred in true_named_entities:
  70. true_which_overlapped_with_pred.append(pred)
  71. evaluation['partial']['correct'] += 1
  72.  
  73. else:
  74.  
  75. # check for overlaps with any of the true entities
  76. for true in true_named_entities:
  77.  
  78.  
  79. # 2. check for an overlap i.e. not exact boundary match, with true entities
  80. if pred.start_offset <= true.end_offset and true.start_offset <= pred.end_offset:
  81.  
  82. true_which_overlapped_with_pred.append(true)
  83.  
  84. evaluation['partial']['partial'] += 1
  85.  
  86. found_overlap = True
  87. break
  88.  
  89. # count spurius (i.e., False Positive) entities
  90. if not found_overlap:
  91. # overall results
  92. evaluation['partial']['spurius'] += 1
  93.  
  94. # count missed entities (i.e. False Negative)
  95. for true in true_named_entities:
  96. if true in true_which_overlapped_with_pred:
  97. continue
  98. else:
  99. # overall results
  100. evaluation['partial']['missed'] += 1
  101.  
  102. # Compute 'possible', 'actual'
  103. for eval_type in ['partial']:
  104.  
  105. correct = evaluation[eval_type]['correct']
  106. partial = evaluation[eval_type]['partial']
  107. missed = evaluation[eval_type]['missed']
  108. spurius = evaluation[eval_type]['spurius']
  109.  
  110. # possible: nr. annotations in the gold-standard which contribute to the final score
  111. evaluation[eval_type]['possible'] = correct + partial + missed
  112.  
  113. # actual: number of annotations produced by the NER system
  114. evaluation[eval_type]['actual'] = correct + partial + spurius
  115.  
  116. actual = evaluation[eval_type]['actual']
  117. possible = evaluation[eval_type]['possible']
  118.  
  119. return evaluation
  120.  
  121. def list_converter(df): # Helper Function for score calculation
  122. keys, values = df.sort_values('Sent_ID_x').values.T
  123. ukeys, index = np.unique(keys,True)
  124. lists = [list(array) for array in np.split(values,index[1:])]
  125. return lists
  126.  
  127. # ideal and pred respectively represent dataframes containing actual labels and predictions for the set of sentences in the test data.
  128. # It has the same format as the sample submission (id, Sent_ID, tag)
  129.  
  130. def calculate_score(ideal, pred): # Calculates the final F1 Score
  131.  
  132. merged = ideal.merge(pred, on = "id", how="inner").drop(['Sent_ID_y'],axis = 1)
  133.  
  134.  
  135. # The scores are calculated sentence wise and then aggregated to calculate the overall score, for this
  136. # List converter function groups the labels by sentence to generate a list of lists with each inner list representing a sentence in sequence
  137. ideal_ = list_converter(merged.drop(['id','tag_y'],axis = 1))
  138. pred_ = list_converter(merged.drop(['id','tag_x'],axis = 1))
  139.  
  140. metrics_results = {'correct': 0, 'partial': 0,
  141. 'missed': 0, 'spurius': 0, 'possible': 0, 'actual': 0}
  142.  
  143. results = {'partial': deepcopy(metrics_results)}
  144.  
  145.  
  146. for true_ents, pred_ents in zip(ideal_, pred_):
  147. # compute results for one sentence
  148. tmp_results = compute_metrics(collect_named_entities(true_ents),collect_named_entities(pred_ents))
  149.  
  150. # aggregate overall results
  151. for eval_schema in results.keys():
  152. for metric in metrics_results.keys():
  153. results[eval_schema][metric] += tmp_results[eval_schema][metric]
  154. correct = results['partial']['correct']
  155. partial = results['partial']['partial']
  156. missed = results['partial']['missed']
  157. spurius = results['partial']['spurius']
  158. actual = results['partial']['actual']
  159. possible = results['partial']['possible']
  160.  
  161.  
  162. precision = (correct + 0.5 * partial) / actual if actual > 0 else 0
  163. recall = (correct + 0.5 * partial) / possible if possible > 0 else 0
  164.  
  165.  
  166. score = (2 * precision * recall)/(precision + recall) if (precision + recall) >0 else 0
  167.  
  168. # final score
  169. return score
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement