Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # A simple, horribly ugly summarizer I tossed together. Just scrapes the array of subreddits you pass along with the top 10 best comments.
- # The table is ugly and uses style tags inline so I don't mess with CSS but I figure I can tweak it when it gets annoying.
- # There's not really any error checking and could absolutely be prettied up but it does what I need.
- import sys
- import io
- import praw
- from praw.models import MoreComments
- import sqlite3
- import time
- from datetime import datetime
- from dateutil.relativedelta import relativedelta
- from dateutil import tz
- import pprint
- # Import everything needed.
- def get_time_difference(dt):
- # Function to return the time difference between now and a submission/comment created_utc date. I stole this from different stackoverflow posts.
- to_zone = tz.gettz('US/Pacific')
- # Uh. Change this.
- start = datetime.now(to_zone)
- ends = datetime.fromtimestamp(dt).replace(tzinfo=to_zone)
- diff = relativedelta(start, ends)
- return "%d days %d hours %d minutes" % (diff.days, diff.hours, diff.minutes)
- def local_time_as_text(dt):
- # Created UTC to time. I stole this from different stackoverflow posts.
- to_zone = tz.gettz('US/Pacific')
- return datetime.fromtimestamp(dt).replace(tzinfo=to_zone).strftime("%m/%d/%Y %I:%M:%S %p %Z")
- subreddits = []
- subreddits.append("politics")
- # Add whatever subreddits you want.
- sq = "c:\\code\\python\\sq_red.sqlite"
- # SQLITE database only contains a 'posts' table - CREATE TABLE posts (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, subreddit text, post_id text, added_on integer)
- conn = sqlite3.connect(sq)
- c = conn.cursor()
- #
- reddit = praw.Reddit(client_id='', client_secret='', user_agent='simple summarizer by /u/blessedarethegeek', username='', password='')
- # Put in your own information here.
- file = open("c:\\temp\\"+datetime.today().strftime('%Y-%m-%d %H %M %S')+" red.html", "w", encoding="utf-8")
- # Add where you want to save the file.
- file.write("<html>\n")
- file.write(" <head>")
- file.write('<meta charset="utf-8"/>')
- # Create Javascript to handle keyboard paging through subreddits and posts. Change the p, o, m, n to whatever binding you want.
- file.write('<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.1.3/jquery.min.js"></script>')
- file.write('<script src="https://cdnjs.cloudflare.com/ajax/libs/mousetrap/1.4.6/mousetrap.min.js"></script>')
- file.write('<script language="javascript">')
- file.write("function nextElement(inputClass) {$(inputClass).each(function(index) {if ($(this).position().top > $(document).scrollTop() + 5) {$(this).get(0).scrollIntoView(); return false;}});}");
- file.write("function prevElement(inputClass) {$(inputClass).each(function(index) {if ($(this).position().top > $(document).scrollTop()) {holdElement.get(0).scrollIntoView(); return false;} else {holdElement = $(this);}});}");
- file.write("Mousetrap.bind('p', function() { nextElement('.subreddit'); });");
- file.write("Mousetrap.bind('o', function() { prevElement('.subreddit'); });");
- file.write("Mousetrap.bind('m', function() { nextElement('.post'); });");
- file.write("Mousetrap.bind('n', function() { prevElement('.post'); });");
- file.write('</script>')
- file.write(" </head>\n")
- file.write(" <body>\n")
- file.write(" <table style='border-collapse: collapse;table-layout:fixed;'>\n")
- for subs in subreddits:
- file.write(" <tr style='border:1px solid black;background-color:#fdc4c4' class='subreddit'><td colspan=2 style='text-align:center;border:1px solid black;width:auto;'><h2>"+subs+"</h2></td></tr>\n")
- for submission in reddit.subreddit(subs).hot(limit=50):
- # Loop through the top 50 hot posts in the subreddit array.
- cursor = c.execute("SELECT id, subreddit, post_id, added_on FROM posts WHERE post_id = '"+submission.id+"';")
- # Check to see if the post was previously pulled.
- rows = cursor.fetchall()
- if len(rows) != 0:
- print("In Database")
- continue
- #if submission.score < 50:
- ## If you want to skip based on score, you could use this.
- #print(" Skip")
- #continue
- file.write(" <tr class='post'><td style='text-align:center; border:1px solid black;width:auto;background-color:#d4d4d4' colspan=2><h3>"+submission.title+"</h3><wbr>"+local_time_as_text(submission.created_utc)+" ( "+get_time_difference(submission.created_utc)+" )</td></tr>\n")
- if submission.thumbnail == None:
- file.write(" <tr><td style='border:1px solid black;white-space:nowrap;width:150px;text-align:center;'> </td><td style='text-align:left; white-space:nowrap; border:1px solid black;width:auto;'><a href='https://www.reddit.com"+submission.permalink+"'>[Main Post URL]</a></td></tr>\n")
- else:
- file.write(" <tr><td style='border:1px solid black;white-space:nowrap;width:150px;text-align:center;'><img src='"+submission.thumbnail+"'/></td><td style='text-align:left; white-space:nowrap; border:1px solid black;width:auto;'><a href='https://www.reddit.com"+submission.permalink+"'>[Main Post URL]</a></td></tr>\n")
- if submission.selftext_html == None:
- file.write(" <tr><td style='text-align:center; border:1px solid black;word-break:break-all;width:auto;' colspan=2>"+submission.selftext+"</td></tr>\n")
- else:
- file.write(" <tr><td style='text-align:center; border:1px solid black;word-break:break-all;width:auto;' colspan=2>"+submission.selftext_html+"</td></tr>\n")
- #
- c.execute("INSERT INTO posts (subreddit, post_id, added_on) VALUES ('"+submission.subreddit.display_name+"', '"+submission.id+"', "+str(time.time())+");")
- # Insert the post information.
- submission.comment_sort = 'best'
- submission.comment_limit = 10
- for top_level_comment in submission.comments:
- # Get the top 10 best comments just to kinda see thoughts on the post.
- if isinstance(top_level_comment, MoreComments):
- # I stole this from somewhere. Sorry original poster!
- continue
- file.write(" <tr><td style='border:1px solid black;white-space:nowrap;width:150px;text-align:center;'><a href='https://www.reddit.com"+top_level_comment.permalink+"'>[comment permalink]</a><br/>"+local_time_as_text(top_level_comment.created_utc)+"<br/>"+get_time_difference(top_level_comment.created_utc)+"</td><td style='border:1px solid black;white-space:normal;word-break:break-all;width:auto;padding-left:10px;'>"+top_level_comment.body_html+"</td></tr>\n")
- #
- file.write(" </table>\n")
- file.write(" </body>\n")
- file.write("</html>\n")
- file.close()
- # Close the file.
- conn.commit()
- conn.close()
- # Close the SQLite connection
Add Comment
Please, Sign In to add comment