Guest User

Untitled

a guest
Jul 23rd, 2018
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.46 KB | None | 0 0
  1. #!/usr/bin/python2
  2. import csv
  3. import sys
  4. import math
  5.  
  6. if (len(sys.argv) < 3):
  7.     print "USAGE: python " + sys.argv[0] + " file.csv username"
  8.     sys.exit(-1)
  9. else:
  10.     # input file
  11.     INPUT_FILE = sys.argv[1]
  12.     # user to compare
  13.     USER = sys.argv[2]
  14.  
  15. # read in csv file
  16. reader = csv.reader(open(INPUT_FILE, 'r'), delimiter=',', quotechar='"')
  17. header = reader.next()
  18.  
  19. # build a csv_list (screw iterators!)
  20. csv_list = []
  21. for row in reader:
  22.     csv_list.append({'article': row[0], 'category': row[1], 'annotator': row[2]})
  23.  
  24. # Users
  25. users = set([x['annotator'] for x in csv_list])
  26.  
  27. if USER not in users:
  28.     print "ERROR: User " + USER + " does not exist!"
  29.     exit(-1)
  30.  
  31. # My annotations
  32. my_annotations = [x for x in csv_list if x['annotator'] == USER]
  33. my_annotations_dict = dict()
  34. for x in my_annotations:
  35.     # build a dictionary to add
  36.     tuple_dict = {'category': x['category']}
  37.     my_annotations_dict[x['article']] = tuple_dict
  38.  
  39. # Articles with category counts
  40. articles = [[x['article'], None] for x in csv_list]
  41. article_category_counts = dict(articles)
  42.  
  43. articles = dict()
  44. for x in csv_list:
  45.     # build a dictionary to add
  46.     tuple_dict = {'category': x['category'], 'annotator': x['annotator']}
  47.     if x['article'] in articles:
  48.         articles[x['article']].append(tuple_dict)
  49.     else:
  50.         articles[x['article']] = [tuple_dict]
  51.  
  52. for article in article_category_counts:
  53.     # build our articles and sum annotations into a dictionary
  54.     article_focus = [str(cat['category']) for cat in articles[article]]
  55.     cat_counts = dict( [ (i, article_focus.count(i)) for i in set(article_focus) ] )
  56.     # save our counted dictionary into the dictionary of articles
  57.     article_category_counts[article] = cat_counts
  58.     # get our leader key, let python do the tiebreaking (because its a dictionary, its unordered hence random)
  59.     max_key = max(cat_counts, key=cat_counts.get)
  60.     # set a 'correct' key in each of user's annotations for easy reference
  61.     if article not in my_annotations_dict:
  62.         my_annotations_dict[article]['correct'] = False
  63.     else:
  64.         my_annotations_dict[article]['correct'] = (max_key == my_annotations_dict[article]['category'])
  65.  
  66. total_correct = len([x for x in my_annotations_dict if my_annotations_dict[x]['correct'] == True])
  67. print "Total Annotations: " + str(len(my_annotations)) + ", Total Correct: " + str(total_correct) + " (" + str(int(float(total_correct) / float(len(my_annotations)) * 100)) + "%)"
Add Comment
Please, Sign In to add comment