SHARE
TWEET

Simple Reddit Summarizer

a guest Jun 12th, 2019 44 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # A simple, horribly ugly summarizer I tossed together.  Just scrapes the array of subreddits you pass along with the top 10 best comments.
  2. # The table is ugly and uses style tags inline so I don't mess with CSS but I figure I can tweak it when it gets annoying.
  3. # There's not really any error checking and could absolutely be prettied up but it does what I need.
  4. import sys
  5. import io
  6. import praw
  7. from praw.models import MoreComments
  8. import sqlite3
  9. import time
  10. from datetime import datetime
  11. from dateutil.relativedelta import relativedelta
  12. from dateutil import tz
  13. import pprint
  14. # Import everything needed.
  15. def get_time_difference(dt):
  16.     # Function to return the time difference between now and a submission/comment created_utc date.  I stole this from different stackoverflow posts.
  17.     to_zone = tz.gettz('US/Pacific')
  18.     # Uh.  Change this.
  19.     start = datetime.now(to_zone)
  20.     ends = datetime.fromtimestamp(dt).replace(tzinfo=to_zone)
  21.     diff = relativedelta(start, ends)
  22.     return "%d days %d hours %d minutes" % (diff.days, diff.hours, diff.minutes)
  23.    
  24. def local_time_as_text(dt):
  25.     # Created UTC to time.  I stole this from different stackoverflow posts.
  26.     to_zone = tz.gettz('US/Pacific')
  27.     return datetime.fromtimestamp(dt).replace(tzinfo=to_zone).strftime("%m/%d/%Y %I:%M:%S %p %Z")
  28.    
  29. subreddits = []
  30. subreddits.append("politics")
  31. # Add whatever subreddits you want.
  32. sq = "c:\\code\\python\\sq_red.sqlite"
  33. # SQLITE database only contains a 'posts' table - CREATE TABLE posts (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, subreddit text, post_id text, added_on integer)
  34. conn = sqlite3.connect(sq)
  35. c = conn.cursor()
  36. #
  37. reddit = praw.Reddit(client_id='', client_secret='', user_agent='simple summarizer by /u/blessedarethegeek', username='', password='')
  38. # Put in your own information here.
  39. file = open("c:\\temp\\"+datetime.today().strftime('%Y-%m-%d %H %M %S')+" red.html", "w", encoding="utf-8")
  40. # Add where you want to save the file.
  41. file.write("<html>\n")
  42. file.write("   <head>")
  43. file.write('<meta charset="utf-8"/>')
  44. # Create Javascript to handle keyboard paging through subreddits and posts.  Change the p, o, m, n to whatever binding you want.
  45. file.write('<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>')
  46. file.write('<script src="https://cdnjs.cloudflare.com/ajax/libs/mousetrap/1.4.6/mousetrap.min.js"></script>')
  47. file.write('<script language="javascript">')
  48. file.write("function nextElement(inputClass) {$(inputClass).each(function(index) {if ($(this).position().top > $(document).scrollTop() + 5) {$(this).get(0).scrollIntoView(); return false;}});}");
  49. file.write("function prevElement(inputClass) {$(inputClass).each(function(index) {if ($(this).position().top > $(document).scrollTop()) {holdElement.get(0).scrollIntoView(); return false;} else {holdElement = $(this);}});}");
  50. file.write("Mousetrap.bind('p', function() { nextElement('.subreddit'); });");
  51. file.write("Mousetrap.bind('o', function() { prevElement('.subreddit'); });");
  52. file.write("Mousetrap.bind('m', function() { nextElement('.post'); });");
  53. file.write("Mousetrap.bind('n', function() { prevElement('.post'); });");
  54. file.write('</script>')
  55. file.write("   </head>\n")
  56. file.write("   <body>\n")
  57. file.write("      <table style='border-collapse: collapse;table-layout:fixed;'>\n")
  58. for subs in subreddits:
  59.     file.write("         <tr style='border:1px solid black;background-color:#fdc4c4' class='subreddit'><td colspan=2 style='text-align:center;border:1px solid black;width:auto;'><h2>"+subs+"</h2></td></tr>\n")
  60.     for submission in reddit.subreddit(subs).hot(limit=50):
  61.         # Loop through the top 50 hot posts in the subreddit array.
  62.         cursor = c.execute("SELECT id, subreddit, post_id, added_on FROM posts WHERE post_id = '"+submission.id+"';")
  63.         # Check to see if the post was previously pulled.
  64.         rows = cursor.fetchall()
  65.         if len(rows) != 0:
  66.             print("In Database")
  67.             continue
  68.         #if submission.score < 50:
  69.             ## If you want to skip based on score, you could use this.
  70.             #print("      Skip")
  71.             #continue
  72.         file.write("         <tr class='post'><td style='text-align:center; border:1px solid black;width:auto;background-color:#d4d4d4' colspan=2><h3>"+submission.title+"</h3><wbr>"+local_time_as_text(submission.created_utc)+" ( "+get_time_difference(submission.created_utc)+" )</td></tr>\n")
  73.         if submission.thumbnail == None:
  74.             file.write("         <tr><td style='border:1px solid black;white-space:nowrap;width:150px;text-align:center;'>&nbsp;</td><td style='text-align:left; white-space:nowrap; border:1px solid black;width:auto;'><a href='https://www.reddit.com"+submission.permalink+"'>[Main Post URL]</a></td></tr>\n")
  75.         else:
  76.             file.write("         <tr><td style='border:1px solid black;white-space:nowrap;width:150px;text-align:center;'><img src='"+submission.thumbnail+"'/></td><td style='text-align:left; white-space:nowrap; border:1px solid black;width:auto;'><a href='https://www.reddit.com"+submission.permalink+"'>[Main Post URL]</a></td></tr>\n")
  77.         if submission.selftext_html == None:
  78.             file.write("         <tr><td style='text-align:center; border:1px solid black;word-break:break-all;width:auto;' colspan=2>"+submission.selftext+"</td></tr>\n")
  79.         else:
  80.             file.write("         <tr><td style='text-align:center; border:1px solid black;word-break:break-all;width:auto;' colspan=2>"+submission.selftext_html+"</td></tr>\n")
  81.         #
  82.         c.execute("INSERT INTO posts (subreddit, post_id, added_on) VALUES ('"+submission.subreddit.display_name+"', '"+submission.id+"', "+str(time.time())+");")
  83.         # Insert the post information.
  84.         submission.comment_sort = 'best'
  85.         submission.comment_limit = 10
  86.         for top_level_comment in submission.comments:
  87.             # Get the top 10 best comments just to kinda see thoughts on the post.
  88.             if isinstance(top_level_comment, MoreComments):
  89.                 # I stole this from somewhere. Sorry original poster!
  90.                 continue
  91.             file.write("         <tr><td style='border:1px solid black;white-space:nowrap;width:150px;text-align:center;'><a href='https://www.reddit.com"+top_level_comment.permalink+"'>[comment permalink]</a><br/>"+local_time_as_text(top_level_comment.created_utc)+"<br/>"+get_time_difference(top_level_comment.created_utc)+"</td><td style='border:1px solid black;white-space:normal;word-break:break-all;width:auto;padding-left:10px;'>"+top_level_comment.body_html+"</td></tr>\n")
  92.         #
  93. file.write("      </table>\n")
  94. file.write("   </body>\n")
  95. file.write("</html>\n")
  96. file.close()
  97. # Close the file.
  98. conn.commit()
  99. conn.close()
  100. # Close the SQLite connection
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top