Atheuz

Reddit User Comment Reporter

Apr 25th, 2012
155
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.15 KB | None | 0 0
  1. from json import loads
  2. import urllib, urllib2
  3. from time import sleep
  4. import collections
  5. import sys, os
  6. import re
  7. import argparse
  8. import datetime
  9. import csv
  10.  
  11. opener = urllib2.build_opener()
  12. opener.addheaders = [('User-Agent','Text Bot by /u/Atheuz')]
  13. urllib2.install_opener(opener)
  14.  
  15. def get_subreddits(jsonurl, dates, subreddits):
  16.     tofetch = jsonurl
  17.     after = None
  18.     pages_traversed = 0
  19.  
  20.     while True:
  21.         sys.stdout.write("%d \r" % pages_traversed)
  22.         sys.stdout.flush()
  23.         parsed = loads(urllib2.urlopen(tofetch).read())
  24.         for link in parsed['data']['children']:
  25.             dates.append(int(link["data"]["created"]))
  26.             subreddits.append("%s" % link["data"]["subreddit"])
  27.         after = parsed['data']['after']
  28.         pages_traversed += 1
  29.         if after:
  30.             tofetch = '%s?after=%s' % (jsonurl, after)
  31.             sleep(2)
  32.         else:
  33.             break
  34.  
  35.     return dates, subreddits
  36.  
  37. def run(comments_page):
  38.     url = "https://www.reddit.com/api/login/"
  39.     values = {"passwd": "", "user": ""}
  40.  
  41.     subreddits = []
  42.     dates = []
  43.  
  44.     data = urllib.urlencode(values)
  45.     req = urllib2.Request(url, data)
  46.     response = urllib2.urlopen(req)
  47.  
  48.     if 'http://' not in comments_page:
  49.         comments_page = 'http://' + comments_page
  50.  
  51.     if comments_page.rfind('/.json') != -1:
  52.         dates, subreddits = get_subreddits(comments_page, subreddits, dates)
  53.     elif comments_page.rfind('/') == len(comments_page) - 1:
  54.         dates, subreddits = get_subreddits(comments_page + '.json', subreddits, dates)
  55.     else:
  56.         dates, subreddits = get_subreddits(comments_page + '/.json', subreddits, dates)
  57.  
  58.     username     = re.search("(?<=user/)\w+", comments_page)
  59.     if username:
  60.         username = username.group()
  61.     else:
  62.         username = "unknown username"
  63.  
  64.     report      = collections.Counter(subreddits)
  65.     total_comments = sum(report.itervalues())
  66.     report_str = ""
  67.     report_str += "   \n"
  68.     report_str += "Report for %s:\n" % username
  69.     report_str += "\nTotal comments: %d\n\n" % total_comments
  70.     report_str += "Summary of comments:\n"
  71.  
  72.     for i,j in sorted(report.items(), key=lambda x: x[1], reverse=True):
  73.         report_str += "/r/%s%03d times -- %06.2f%% of total comments.\n" % (i.ljust(35),j, (float(j)/float(total_comments))*100.0)
  74.  
  75.     print report_str
  76.  
  77.     try:
  78.         os.mkdir("output")
  79.     except WindowsError:
  80.         pass
  81.  
  82.     os.chdir("output")
  83.     with open("%s_report.txt" % username, "wb") as f:
  84.         f.write(report_str)
  85.         f.close()
  86.     os.chdir("..")
  87.  
  88.     return dates
  89.  
  90.  
  91. def main():
  92.     parser = argparse.ArgumentParser()
  93.     parser.add_argument('comments_page', metavar='P', nargs='+', type=str, default=None,
  94.             help='link to submissions')
  95.     args = parser.parse_args()
  96.  
  97.     comments_page = args.comments_page
  98.  
  99.     for i in comments_page:
  100.         d = run(i)
  101.         print "Last comment: %s" % datetime.datetime.fromtimestamp(d[0]).strftime("%Y-%m-%d %H:%M:%S")
  102.         print "First comment: %s" % datetime.datetime.fromtimestamp(d[-1]).strftime("%Y-%m-%d %H:%M:%S")
  103.  
  104.  
  105. if __name__ == '__main__':
  106.     main()
Add Comment
Please, Sign In to add comment