Untitled

"""
The goal of this program is to:
1) Find top submissions from multiple subreddits
2) Look through the comments that contain links
3) Sort by score
4) Document Username, Link, Link Context, Parent Comment
5) Store information to file
"""

# Importing necessary modules (For the most part)
import praw
import logging
import re
from pprint import pprint
import requests
import json

# Adds logging to the program, tells when information is being requested
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
logger = logging.getLogger("prawcore")
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)

# Configures how we will view logs
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s -%(levelname)s - %(message)s')
logging.getLogger().setLevel(logging.INFO)

# Initialize Praw, the Reddit API Wrapper
reddit = praw.Reddit(client_id='pQr2bTUsJzvAYA',
                     client_secret='hNsIBr42jlcVWy8_-TFHZAsOb08',
                     password='ThereIsNoHopeRight',
                     username='LeftcomGANG',
                     user_agent="Linux : 7NgvlaWfS9R0nA : 1.0 (by /u/Nighthawk153)")

# It's supposed to be read only but I can't get it to be dat way
print(reddit.read_only)


def getSubComments(comment, allComments, verbose=True):
    """
    Given a comment, it will look through every reply and subcomment in the
    comment thread
    :param comment: The comment object/ID
    :param allComments: The current list of comments looked at
    :param verbose: True/False, decides if program will output logs of comments
    :return:
    """
    allComments.append(comment)
    if not hasattr(comment, "replies"):
        replies = comment.comments()
        if verbose: print("fetching (" + str(len(allComments)) + " comments fetched total)")
    else:
        replies = comment.replies
    for child in replies:
        getSubComments(child, allComments, verbose=verbose)


def getAll(r, submissionId, verbose=True):
    """
    Gets all comments from a thread
    :param r: The reddit module / object we are using. Gathered from praw
    :param submissionId: The current thread being scraped
    :param verbose: Decides if the program outputs the comments found
    :return: Returns a list of every comment from the thread
    """
    submission = r.submission(submissionId)
    comments = submission.comments
    commentsList = []
    #For every comment in the section, we will look at every subcomment available
    for comment in comments:
        getSubComments(comment, commentsList, verbose=verbose)
    print(len(commentsList))
    #Returns the list of comments from the thread
    return commentsList


# Creates a non-filtered and non-purified list of links, needs to be scrubbed
def CreateLinkList(CommentList):
    LinkList = []
    counter = 0
    failedcounter = 0
    list = [".com", ".org", ".net"]
    for comment in CommentList:
        try:
            for word in comment.body.split():
                for Target in list:
                    if Target in word:
                        LinkList.append(word)
            counter += 1
        except:
            failedcounter += 1
    logging.info("Failed: " + str(failedcounter))
    logging.info("Worked: " + str(counter))
    return LinkList


def CreateLog(list,logfile):
    """
    Given the list of prurified and scrubbed links, it will now write this list of links
    to a file named Links.txt
    :param list: The list of links we are writing to the file
    :return: Returns nothing
    """
    logging.info("Writing links to file.... Link.txt")
    for item in list:
        print(item)
        LinkLog.write(item)
        LinkLog.write("\n\n")
    LinkLog.close()
    logging.info("Successful in writing to document!")


def PurifyLinkList(LinkList):
    """
    Uses Regex to help filter and find links from a text.
    :param LinkList: The list of comments that contain links
    :return: Returns a list of clickable and working links
    """
    URLRegex = re.compile("(?P<url>https?://[^\s]+)")
    LinkResults = []
    for link in LinkList:
        try:
            URL = URLRegex.search(link)
            PureLink = URL.group()
            LinkResults.append(PureLink)
        except:
            continue
    return LinkResults


LeftistList = ['esist']
commentchoice = False
threadchoice = True
threadamount = 10
totalcomments = []
threadlinks = []
threadlist = []
threadcomments = []


for sub in LeftistList:
    subreddit = reddit.subreddit(str(sub))
    #The program will loop through each subreddit title, and look through it's comments
    try:
        for thread in subreddit.hot(limit=10):
            threadlist.append(thread)
    except:
        logging.info("Something went wrong with this subreddit")
        continue
    #Now threadlist should be full of thread objects. 600~ or so.
    logging.info("Threadlist is " + str(len(threadlist)) + " items long")
logging.info("Threadlist items: " + str(len(threadlist)))

#threadlist is now a very long list full of reddit thread objects
#The program will now go through each thread --> Read each comment --> Read each subcomment
#Look for links, and add them to the Linklist.
for thread in threadlist:
    if threadchoice == True:
         threadlinks.append(thread.url)
    if commentchoice == True:
        for comment in getAll(reddit, thread):
            threadcomments.append(comment)

LinkLog = open("Link.txt", "w")
if threadchoice == True:
    CreateLog(threadlinks,LinkLog)
if commentchoice == True:
    LinkList = CreateLinkList(threadcomments)
    Results = PurifyLinkList(LinkList)
    CreateLog(Results,LinkLog)