Advertisement
Guest User

Untitled

a guest
May 25th, 2015
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.88 KB | None | 0 0
  1. import sys
  2.  
  3. FILENAMES = {
  4. 'first': [ 'shuffled_review_with_tag_{0}'.format(i) for i in range(0, 8) ],
  5. 'second': [ 'shuffled_review_without_tag_{0}'.format(i) for i in range(0, 8) ],
  6. }
  7.  
  8. NUMBER_OF_CATEGORIES = 8 + 1 # including neutral
  9.  
  10. def read_entry(input_file):
  11. key = input_file.readline().strip()
  12. if not key:
  13. return None
  14.  
  15. content = input_file.readline().strip()
  16. tag = input_file.readline().strip()
  17. if tag:
  18. tag = int(tag)
  19. else:
  20. tag = -1
  21.  
  22. return (key, content, tag)
  23.  
  24. def compare_entries(first_file, second_file):
  25. counts = [ [ 0 for i in range(0, NUMBER_OF_CATEGORIES) ] for i in range(0, NUMBER_OF_CATEGORIES) ]
  26. not_matched_entries = []
  27.  
  28. while True:
  29. first_entry = read_entry(first_file)
  30. second_entry = read_entry(second_file)
  31. if (not first_entry) or (not second_entry):
  32. if (not first_entry) and (not second_entry):
  33. break
  34. else:
  35. sys.exit('size of entry is not matched')
  36.  
  37. if first_entry[0] != second_entry[0]:
  38. print 'key1: {0}\nkey2: {1}'.format(first_entry[0], second_entry[0])
  39. sys.exit('key is not matched')
  40.  
  41. counts[first_entry[2]][second_entry[2]] += 1
  42.  
  43. if first_entry[2] != second_entry[2]:
  44. not_matched_entries.append((first_entry, second_entry))
  45.  
  46. return (counts, not_matched_entries)
  47.  
  48. def calculate_kappa(counts):
  49. total = sum(map(lambda x: sum(x), counts))
  50. p = map(lambda x: map(lambda y: float(y) / total, x), counts)
  51.  
  52. p_a = 0.0
  53. for i in range(0, NUMBER_OF_CATEGORIES):
  54. p_a += p[i][i]
  55.  
  56. p_e = 0.0
  57. for i in range(0, NUMBER_OF_CATEGORIES):
  58. for j in range(0, NUMBER_OF_CATEGORIES):
  59. p_e += p[i][j] * p[j][i]
  60.  
  61. return (p_a - p_e) / (1.0 - p_e)
  62.  
  63.  
  64. while True:
  65. for (first_filename, second_filename) in zip(FILENAMES['first'], FILENAMES['second']):
  66. with open(first_filename) as first_file:
  67. with open (second_filename) as second_file:
  68. (counts, not_matched_entries) = compare_entries(first_file, second_file)
  69. kappa = calculate_kappa(counts)
  70.  
  71. print(kappa)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement