Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import operator
- import matplotlib.pyplot as plt
- def rape_analysis():
- rape_related_tags = { #python set is nonperformant,
- #although I don't know how much that matters
- "rape": 1,
- "anti-rape": 1,
- "rapeculture": 1,
- "sexualassault": 1,
- "consent": 1
- }
- related_posts = {}
- with open("/home/curuinor/data/mefi_dump/tagdata_mefi.txt", "r") as mefi_tags_file:
- reader = csv.reader(mefi_tags_file, delimiter="\t")
- for row in reader:
- if row[3] in rape_related_tags:
- related_posts[int(row[1])] = set()
- with open("/home/curuinor/data/mefi_dump/commentdata_mefi.txt", "r") as mefi_comment_file:
- reader = csv.reader(mefi_comment_file, delimiter="\t")
- for row in reader:
- if row[1] == "postid":
- continue
- if int(row[1]) in related_posts: #row[1] is post id
- related_posts[int(row[1])].add(row[2])
- return related_posts.items()
- if __name__ == "__main__":
- num_users_per_rape_post = sorted(rape_analysis(), key=operator.itemgetter(0))
- xs = map(lambda x: x[0], num_users_per_rape_post)
- print xs
- ys = map(lambda x: len(x[1]), num_users_per_rape_post)
- print ys
- plt.plot(xs, ys)
- plt.title("postid of rape-related discussions vs users involved")
- plt.xlabel("postid")
- plt.ylabel("number of users involved")
- #plt.show()
- plt.savefig("postid_versus_users_involved")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement