Advertisement
Guest User

Untitled

a guest
Dec 5th, 2017
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.66 KB | None | 0 0
  1. """
  2. The goal of this program is to:
  3. 1) Find top submissions from multiple subreddits
  4. 2) Look through the comments that contain links
  5. 3) Sort by score
  6. 4) Document Username, Link, Link Context, Parent Comment
  7. 5) Store information to file
  8. """
  9.  
  10. # Importing necessary modules (For the most part)
  11. import praw
  12. import logging
  13. import re
  14. from pprint import pprint
  15. import requests
  16. import json
  17.  
  18. # Adds logging to the program, tells when information is being requested
  19. handler = logging.StreamHandler()
  20. handler.setLevel(logging.DEBUG)
  21. logger = logging.getLogger("prawcore")
  22. logger.setLevel(logging.DEBUG)
  23. logger.addHandler(handler)
  24.  
  25. # Configures how we will view logs
  26. logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s -%(levelname)s - %(message)s')
  27. logging.getLogger().setLevel(logging.INFO)
  28.  
  29. # Initialize Praw, the Reddit API Wrapper
  30. reddit = praw.Reddit(client_id='pQr2bTUsJzvAYA',
  31. client_secret='hNsIBr42jlcVWy8_-TFHZAsOb08',
  32. password='ThereIsNoHopeRight',
  33. username='LeftcomGANG',
  34. user_agent="Linux : 7NgvlaWfS9R0nA : 1.0 (by /u/Nighthawk153)")
  35.  
  36. # It's supposed to be read only but I can't get it to be dat way
  37. print(reddit.read_only)
  38.  
  39.  
  40. def getSubComments(comment, allComments, verbose=True):
  41. """
  42. Given a comment, it will look through every reply and subcomment in the
  43. comment thread
  44. :param comment: The comment object/ID
  45. :param allComments: The current list of comments looked at
  46. :param verbose: True/False, decides if program will output logs of comments
  47. :return:
  48. """
  49. allComments.append(comment)
  50. if not hasattr(comment, "replies"):
  51. replies = comment.comments()
  52. if verbose: print("fetching (" + str(len(allComments)) + " comments fetched total)")
  53. else:
  54. replies = comment.replies
  55. for child in replies:
  56. getSubComments(child, allComments, verbose=verbose)
  57.  
  58.  
  59. def getAll(r, submissionId, verbose=True):
  60. """
  61. Gets all comments from a thread
  62. :param r: The reddit module / object we are using. Gathered from praw
  63. :param submissionId: The current thread being scraped
  64. :param verbose: Decides if the program outputs the comments found
  65. :return: Returns a list of every comment from the thread
  66. """
  67. submission = r.submission(submissionId)
  68. comments = submission.comments
  69. commentsList = []
  70. #For every comment in the section, we will look at every subcomment available
  71. for comment in comments:
  72. getSubComments(comment, commentsList, verbose=verbose)
  73. print(len(commentsList))
  74. #Returns the list of comments from the thread
  75. return commentsList
  76.  
  77.  
  78. # Creates a non-filtered and non-purified list of links, needs to be scrubbed
  79. def CreateLinkList(CommentList):
  80. LinkList = []
  81. counter = 0
  82. failedcounter = 0
  83. list = [".com", ".org", ".net"]
  84. for comment in CommentList:
  85. try:
  86. for word in comment.body.split():
  87. for Target in list:
  88. if Target in word:
  89. LinkList.append(word)
  90. counter += 1
  91. except:
  92. failedcounter += 1
  93. logging.info("Failed: " + str(failedcounter))
  94. logging.info("Worked: " + str(counter))
  95. return LinkList
  96.  
  97.  
  98.  
  99. def CreateLog(list,logfile):
  100. """
  101. Given the list of prurified and scrubbed links, it will now write this list of links
  102. to a file named Links.txt
  103. :param list: The list of links we are writing to the file
  104. :return: Returns nothing
  105. """
  106. logging.info("Writing links to file.... Link.txt")
  107. for item in list:
  108. print(item)
  109. LinkLog.write(item)
  110. LinkLog.write("\n\n")
  111. LinkLog.close()
  112. logging.info("Successful in writing to document!")
  113.  
  114.  
  115. def PurifyLinkList(LinkList):
  116. """
  117. Uses Regex to help filter and find links from a text.
  118. :param LinkList: The list of comments that contain links
  119. :return: Returns a list of clickable and working links
  120. """
  121. URLRegex = re.compile("(?P<url>https?://[^\s]+)")
  122. LinkResults = []
  123. for link in LinkList:
  124. try:
  125. URL = URLRegex.search(link)
  126. PureLink = URL.group()
  127. LinkResults.append(PureLink)
  128. except:
  129. continue
  130. return LinkResults
  131.  
  132.  
  133. LeftistList = ['esist']
  134. commentchoice = False
  135. threadchoice = True
  136. threadamount = 10
  137. totalcomments = []
  138. threadlinks = []
  139. threadlist = []
  140. threadcomments = []
  141.  
  142.  
  143. for sub in LeftistList:
  144. subreddit = reddit.subreddit(str(sub))
  145. #The program will loop through each subreddit title, and look through it's comments
  146. try:
  147. for thread in subreddit.hot(limit=10):
  148. threadlist.append(thread)
  149. except:
  150. logging.info("Something went wrong with this subreddit")
  151. continue
  152. #Now threadlist should be full of thread objects. 600~ or so.
  153. logging.info("Threadlist is " + str(len(threadlist)) + " items long")
  154. logging.info("Threadlist items: " + str(len(threadlist)))
  155.  
  156. #threadlist is now a very long list full of reddit thread objects
  157. #The program will now go through each thread --> Read each comment --> Read each subcomment
  158. #Look for links, and add them to the Linklist.
  159. for thread in threadlist:
  160. if threadchoice == True:
  161. threadlinks.append(thread.url)
  162. if commentchoice == True:
  163. for comment in getAll(reddit, thread):
  164. threadcomments.append(comment)
  165.  
  166. LinkLog = open("Link.txt", "w")
  167. if threadchoice == True:
  168. CreateLog(threadlinks,LinkLog)
  169. if commentchoice == True:
  170. LinkList = CreateLinkList(threadcomments)
  171. Results = PurifyLinkList(LinkList)
  172. CreateLog(Results,LinkLog)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement