Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python2
- import csv
- import sys
- import math
- if (len(sys.argv) < 3):
- print "USAGE: python " + sys.argv[0] + " file.csv username"
- sys.exit(-1)
- else:
- # input file
- INPUT_FILE = sys.argv[1]
- # user to compare
- USER = sys.argv[2]
- # read in csv file
- reader = csv.reader(open(INPUT_FILE, 'r'), delimiter=',', quotechar='"')
- header = reader.next()
- # build a csv_list (screw iterators!)
- csv_list = []
- for row in reader:
- csv_list.append({'article': row[0], 'category': row[1], 'annotator': row[2]})
- # Users
- users = set([x['annotator'] for x in csv_list])
- if USER not in users:
- print "ERROR: User " + USER + " does not exist!"
- exit(-1)
- # My annotations
- my_annotations = [x for x in csv_list if x['annotator'] == USER]
- my_annotations_dict = dict()
- for x in my_annotations:
- # build a dictionary to add
- tuple_dict = {'category': x['category']}
- my_annotations_dict[x['article']] = tuple_dict
- # Articles with category counts
- articles = [[x['article'], None] for x in csv_list]
- article_category_counts = dict(articles)
- articles = dict()
- for x in csv_list:
- # build a dictionary to add
- tuple_dict = {'category': x['category'], 'annotator': x['annotator']}
- if x['article'] in articles:
- articles[x['article']].append(tuple_dict)
- else:
- articles[x['article']] = [tuple_dict]
- for article in article_category_counts:
- # build our articles and sum annotations into a dictionary
- article_focus = [str(cat['category']) for cat in articles[article]]
- cat_counts = dict( [ (i, article_focus.count(i)) for i in set(article_focus) ] )
- # save our counted dictionary into the dictionary of articles
- article_category_counts[article] = cat_counts
- # get our leader key, let python do the tiebreaking (because its a dictionary, its unordered hence random)
- max_key = max(cat_counts, key=cat_counts.get)
- # set a 'correct' key in each of user's annotations for easy reference
- if article not in my_annotations_dict:
- my_annotations_dict[article]['correct'] = False
- else:
- my_annotations_dict[article]['correct'] = (max_key == my_annotations_dict[article]['category'])
- total_correct = len([x for x in my_annotations_dict if my_annotations_dict[x]['correct'] == True])
- print "Total Annotations: " + str(len(my_annotations)) + ", Total Correct: " + str(total_correct) + " (" + str(int(float(total_correct) / float(len(my_annotations)) * 100)) + "%)"
Add Comment
Please, Sign In to add comment