Atheuz

Untitled

Mar 7th, 2012
177
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.05 KB | None | 0 0
  1. from json import loads
  2. import urllib, urllib2
  3. from time import sleep
  4. import collections
  5. import sys, os
  6. import re
  7. import argparse
  8. import datetime
  9. import csv
  10.  
  11. def get_subreddits(jsonurl, dates, subreddits):
  12.     tofetch = jsonurl
  13.     after = None
  14.     pages_traversed = 0
  15.  
  16.     while True:
  17.         sys.stdout.write("%d \r" % pages_traversed)
  18.         sys.stdout.flush()
  19.         parsed = loads(urllib2.urlopen(tofetch).read())
  20.         for link in parsed['data']['children']:
  21.             dates.append(int(link["data"]["created"]))
  22.             subreddits.append("%s" % link["data"]["subreddit"])
  23.         after = parsed['data']['after']
  24.         pages_traversed += 1
  25.         if after:
  26.             tofetch = '%s?after=%s' % (jsonurl, after)
  27.             sleep(2)
  28.         else:
  29.             break
  30.  
  31.     return dates, subreddits
  32.  
  33. def run(comments_page):
  34.     url = "https://www.reddit.com/api/login/"
  35.     values = {"passwd": "YOUR_PASSWORD", "user": "YOUR_USERNAME"}
  36.  
  37.     subreddits = []
  38.     dates = []
  39.  
  40.     data = urllib.urlencode(values)
  41.     req = urllib2.Request(url, data)
  42.     response = urllib2.urlopen(req)
  43.  
  44.     if 'http://' not in comments_page:
  45.         comments_page = 'http://' + comments_page
  46.  
  47.     if comments_page.rfind('/.json') != -1:
  48.         dates, subreddits = get_subreddits(comments_page, subreddits, dates)
  49.     elif comments_page.rfind('/') == len(comments_page) - 1:
  50.         dates, subreddits = get_subreddits(comments_page + '.json', subreddits, dates)
  51.     else:
  52.         dates, subreddits = get_subreddits(comments_page + '/.json', subreddits, dates)
  53.  
  54.     username     = re.search("(?<=user/)\w+", comments_page)
  55.     if username:
  56.         username = username.group()
  57.     else:
  58.         username = "unknown username"
  59.  
  60.     report      = collections.Counter(subreddits)
  61.     total_comments = sum(report.itervalues())
  62.     report_str = ""
  63.     report_str += "   \n"
  64.     report_str += "Report for %s:\n" % username
  65.     report_str += "\nTotal comments: %d\n\n" % total_comments
  66.     report_str += "Summary of comments:\n"
  67.  
  68.     for i,j in sorted(report.items(), key=lambda x: x[1], reverse=True):
  69.         report_str += "/r/%s%03d times -- %06.2f%% of total comments.\n" % (i.ljust(35),j, (float(j)/float(total_comments))*100.0)
  70.  
  71.     print report_str
  72.  
  73.     try:
  74.         os.mkdir("output")
  75.     except WindowsError:
  76.         pass
  77.  
  78.     os.chdir("output")
  79.     with open("%s_report.txt" % username, "wb") as f:
  80.         f.write(report_str)
  81.         f.close()
  82.     os.chdir("..")
  83.  
  84.     return dates
  85.  
  86.  
  87. def main():
  88.     parser = argparse.ArgumentParser()
  89.     parser.add_argument('comments_page', metavar='P', nargs='+', type=str, default=None,
  90.             help='link to submissions')
  91.     args = parser.parse_args()
  92.  
  93.     comments_page = args.comments_page
  94.  
  95.     for i in comments_page:
  96.         d = run(i)
  97.         print "Last comment: %s" % datetime.datetime.fromtimestamp(d[0]).strftime("%Y-%m-%d %H:%M:%S")
  98.         print "First comment: %s" % datetime.datetime.fromtimestamp(d[-1]).strftime("%Y-%m-%d %H:%M:%S")
  99.  
  100. if __name__ == '__main__':
  101.     main()
Add Comment
Please, Sign In to add comment