Advertisement
Guest User

mefi_analysis

a guest
Feb 21st, 2015
301
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.48 KB | None | 0 0
  1. import csv
  2. import operator
  3. import matplotlib.pyplot as plt
  4.  
  5. def rape_analysis():
  6. rape_related_tags = { #python set is nonperformant,
  7. #although I don't know how much that matters
  8. "rape": 1,
  9. "anti-rape": 1,
  10. "rapeculture": 1,
  11. "sexualassault": 1,
  12. "consent": 1
  13. }
  14. related_posts = {}
  15. with open("/home/curuinor/data/mefi_dump/tagdata_mefi.txt", "r") as mefi_tags_file:
  16. reader = csv.reader(mefi_tags_file, delimiter="\t")
  17. for row in reader:
  18. if row[3] in rape_related_tags:
  19. related_posts[int(row[1])] = set()
  20. with open("/home/curuinor/data/mefi_dump/commentdata_mefi.txt", "r") as mefi_comment_file:
  21. reader = csv.reader(mefi_comment_file, delimiter="\t")
  22. for row in reader:
  23. if row[1] == "postid":
  24. continue
  25. if int(row[1]) in related_posts: #row[1] is post id
  26. related_posts[int(row[1])].add(row[2])
  27. return related_posts.items()
  28.  
  29. if __name__ == "__main__":
  30. num_users_per_rape_post = sorted(rape_analysis(), key=operator.itemgetter(0))
  31. xs = map(lambda x: x[0], num_users_per_rape_post)
  32. print xs
  33. ys = map(lambda x: len(x[1]), num_users_per_rape_post)
  34. print ys
  35. plt.plot(xs, ys)
  36. plt.title("postid of rape-related discussions vs users involved")
  37. plt.xlabel("postid")
  38. plt.ylabel("number of users involved")
  39. #plt.show()
  40. plt.savefig("postid_versus_users_involved")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement