Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- The goal of this program is to:
- 1) Find top submissions from multiple subreddits
- 2) Look through the comments that contain links
- 3) Sort by score
- 4) Document Username, Link, Link Context, Parent Comment
- 5) Store information to file
- """
- #Importing necessary modules (For the most part)
- import praw
- import logging
- import re
- from pprint import pprint
- import requests
- import json
- #Adds logging to the program, tells when information is being requested
- handler = logging.StreamHandler()
- handler.setLevel(logging.DEBUG)
- logger = logging.getLogger("prawcore")
- logger.setLevel(logging.DEBUG)
- logger.addHandler(handler)
- #Configures how we will view logs
- logging.basicConfig(level=logging.DEBUG,format=' %(asctime)s -%(levelname)s - %(message)s')
- logging.getLogger().setLevel(logging.INFO)
- #Initialize Praw, the Reddit API Wrapper
- reddit = praw.Reddit(client_id='CLIENT REDDIT GIVES YOU',
- client_secret='SECRET REDDIT GIVES YOU',
- password='YOUR PASSWORD HERE',
- username='YOUR REDDIT USERNAME',
- user_agent="YOUR OS : ID REDDIT GIVES YOU : 1.0 (by /u/REDDIT USERNAME)")
- #It's supposed to be read only but I can't get it to be dat way
- print(reddit.read_only)
- def getSubComments(comment, allComments, verbose=True):
- allComments.append(comment)
- if not hasattr(comment, "replies"):
- replies = comment.comments()
- if verbose: print("fetching (" + str(len(allComments)) + " comments fetched total)")
- else:
- replies = comment.replies
- for child in replies:
- getSubComments(child, allComments, verbose=verbose)
- def getAll(r, submissionId, verbose=True):
- submission = r.submission(submissionId)
- comments = submission.comments
- commentsList = []
- for comment in comments:
- getSubComments(comment, commentsList, verbose=verbose)
- print(len(commentsList))
- return commentsList
- #Creates a non-filtered and non-purified list of links, needs to be scrubbed
- def CreateLinkList(CommentList):
- LinkList = []
- counter = 0
- failedcounter = 0
- list = [".com",".org",".net"]
- for comment in CommentList:
- try:
- for word in comment.body.split():
- for Target in list:
- if Target in word:
- LinkList.append(word)
- counter += 1
- except:
- failedcounter += 1
- logging.info("Failed: " + str(failedcounter))
- logging.info("Worked: " + str(counter))
- return LinkList
- #Writes contents of whatever list is passed through into the Link.txt file
- def CreateLog(list):
- print("ASDASDASD")
- LinkLog = open("Link.txt", "w")
- for item in list:
- print(item)
- LinkLog.write(item)
- LinkLog.write("\n\n")
- LinkLog.close()
- def PurifyLinkList(LinkList):
- URLRegex = re.compile("(?P<url>https?://[^\s]+)")
- LinkResults = []
- for link in LinkList:
- try:
- URL = URLRegex.search(link)
- PureLink = URL.group()
- LinkResults.append(PureLink)
- except:
- continue
- return LinkResults
- totalcomments = []
- subreddit = reddit.subreddit('socialism')
- for x in subreddit.top(limit=10):
- for comment in getAll(reddit,x):
- totalcomments.append(comment)
- LinkList = CreateLinkList(totalcomments)
- Results = PurifyLinkList(LinkList)
- CreateLog(Results)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement