Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # Format of each line is:
- import sys
- import csv
- reader = csv.reader(sys.stdin, delimiter='\t')
- writer = csv.writer(sys.stdout, delimiter='\t', quotechar='"', quoting=csv.QUOTE_ALL)
- next(reader, None) # skip the first line because it contains the headings of the colums
- for line in reader:
- data = line
- if len(data) == 19:
- #userid1, title, tagnames, author_id, body, node_type, parent_id, abs_parent_id, added_at, score, state_string, last_edited_id, last_activity_by_id, last_activity_at, active_revision_id, extra, extra_ref_id, extra_count, marked = data
- Post_id = data[0]
- Author_id = data[3]
- Body_Count =len(data[4])
- nodetype = data[5]
- Parent_id = data[6]
- if nodetype == 'answer': #to get the results for questions only, then uncoment this line of code and adjust the indent for the line below
- print "{0}\t{1}\t{2}".format(nodetype,Body_Count,Parent_id) # print it out
- elif nodetype == 'question': #to get the results for questions only, then uncoment this line of code and adjust the indent for the line below
- print "{0}\t{1}\t{2}".format(nodetype,Body_Count,Parent_id) # print it out
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement