Untitled

"""
The goal of this program is to:
1) Find top submissions from multiple subreddits
2) Look through the comments that contain links
3) Sort by score
4) Document Username, Link, Link Context, Parent Comment
5) Store information to file
"""

#Importing necessary modules (For the most part)
import praw
import logging
import re
from pprint import pprint
import requests
import json

#Adds logging to the program, tells when information is being requested
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
logger = logging.getLogger("prawcore")
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)

#Configures how we will view logs
logging.basicConfig(level=logging.DEBUG,format=' %(asctime)s -%(levelname)s - %(message)s')
logging.getLogger().setLevel(logging.INFO)

#Initialize Praw, the Reddit API Wrapper
reddit = praw.Reddit(client_id='CLIENT REDDIT GIVES YOU',
                     client_secret='SECRET REDDIT GIVES YOU',
                     password='YOUR PASSWORD HERE',
                     username='YOUR REDDIT USERNAME',
                     user_agent="YOUR OS : ID REDDIT GIVES YOU : 1.0 (by /u/REDDIT USERNAME)")

#It's supposed to be read only but I can't get it to be dat way
print(reddit.read_only)


def getSubComments(comment, allComments, verbose=True):
  allComments.append(comment)
  if not hasattr(comment, "replies"):
    replies = comment.comments()
    if verbose: print("fetching (" + str(len(allComments)) + " comments fetched total)")
  else:
    replies = comment.replies
  for child in replies:
    getSubComments(child, allComments, verbose=verbose)


def getAll(r, submissionId, verbose=True):
  submission = r.submission(submissionId)
  comments = submission.comments
  commentsList = []
  for comment in comments:
    getSubComments(comment, commentsList, verbose=verbose)
  print(len(commentsList))
  return commentsList

#Creates a non-filtered and non-purified list of links, needs to be scrubbed
def CreateLinkList(CommentList):
    LinkList = []
    counter = 0
    failedcounter = 0
    list = [".com",".org",".net"]
    for comment in CommentList:
        try:
            for word in comment.body.split():
                for Target in list:
                    if Target in word:
                        LinkList.append(word)
            counter += 1
        except:
            failedcounter += 1
    logging.info("Failed: " + str(failedcounter))
    logging.info("Worked: " + str(counter))
    return LinkList

#Writes contents of whatever list is passed through into the Link.txt file
def CreateLog(list):
    print("ASDASDASD")
    LinkLog = open("Link.txt", "w")
    for item in list:
        print(item)
        LinkLog.write(item)
        LinkLog.write("\n\n")
    LinkLog.close()

def PurifyLinkList(LinkList):
    URLRegex = re.compile("(?P<url>https?://[^\s]+)")
    LinkResults = []
    for link in LinkList:
        try:
            URL = URLRegex.search(link)
            PureLink = URL.group()
            LinkResults.append(PureLink)
        except:
            continue
    return LinkResults


totalcomments = []
subreddit = reddit.subreddit('socialism')
for x in subreddit.top(limit=10):
    for comment in getAll(reddit,x):
        totalcomments.append(comment)

LinkList = CreateLinkList(totalcomments)
Results = PurifyLinkList(LinkList)
CreateLog(Results)