Advertisement
Guest User

Untitled

a guest
Nov 25th, 2017
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.38 KB | None | 0 0
  1. """
  2. The goal of this program is to:
  3. 1) Find top submissions from multiple subreddits
  4. 2) Look through the comments that contain links
  5. 3) Sort by score
  6. 4) Document Username, Link, Link Context, Parent Comment
  7. 5) Store information to file
  8. """
  9.  
  10. #Importing necessary modules (For the most part)
  11. import praw
  12. import logging
  13. import re
  14. from pprint import pprint
  15. import requests
  16. import json
  17.  
  18. #Adds logging to the program, tells when information is being requested
  19. handler = logging.StreamHandler()
  20. handler.setLevel(logging.DEBUG)
  21. logger = logging.getLogger("prawcore")
  22. logger.setLevel(logging.DEBUG)
  23. logger.addHandler(handler)
  24.  
  25. #Configures how we will view logs
  26. logging.basicConfig(level=logging.DEBUG,format=' %(asctime)s -%(levelname)s - %(message)s')
  27. logging.getLogger().setLevel(logging.INFO)
  28.  
  29. #Initialize Praw, the Reddit API Wrapper
  30. reddit = praw.Reddit(client_id='CLIENT REDDIT GIVES YOU',
  31. client_secret='SECRET REDDIT GIVES YOU',
  32. password='YOUR PASSWORD HERE',
  33. username='YOUR REDDIT USERNAME',
  34. user_agent="YOUR OS : ID REDDIT GIVES YOU : 1.0 (by /u/REDDIT USERNAME)")
  35.  
  36. #It's supposed to be read only but I can't get it to be dat way
  37. print(reddit.read_only)
  38.  
  39.  
  40. def getSubComments(comment, allComments, verbose=True):
  41. allComments.append(comment)
  42. if not hasattr(comment, "replies"):
  43. replies = comment.comments()
  44. if verbose: print("fetching (" + str(len(allComments)) + " comments fetched total)")
  45. else:
  46. replies = comment.replies
  47. for child in replies:
  48. getSubComments(child, allComments, verbose=verbose)
  49.  
  50.  
  51. def getAll(r, submissionId, verbose=True):
  52. submission = r.submission(submissionId)
  53. comments = submission.comments
  54. commentsList = []
  55. for comment in comments:
  56. getSubComments(comment, commentsList, verbose=verbose)
  57. print(len(commentsList))
  58. return commentsList
  59.  
  60. #Creates a non-filtered and non-purified list of links, needs to be scrubbed
  61. def CreateLinkList(CommentList):
  62. LinkList = []
  63. counter = 0
  64. failedcounter = 0
  65. list = [".com",".org",".net"]
  66. for comment in CommentList:
  67. try:
  68. for word in comment.body.split():
  69. for Target in list:
  70. if Target in word:
  71. LinkList.append(word)
  72. counter += 1
  73. except:
  74. failedcounter += 1
  75. logging.info("Failed: " + str(failedcounter))
  76. logging.info("Worked: " + str(counter))
  77. return LinkList
  78.  
  79. #Writes contents of whatever list is passed through into the Link.txt file
  80. def CreateLog(list):
  81. print("ASDASDASD")
  82. LinkLog = open("Link.txt", "w")
  83. for item in list:
  84. print(item)
  85. LinkLog.write(item)
  86. LinkLog.write("\n\n")
  87. LinkLog.close()
  88.  
  89. def PurifyLinkList(LinkList):
  90. URLRegex = re.compile("(?P<url>https?://[^\s]+)")
  91. LinkResults = []
  92. for link in LinkList:
  93. try:
  94. URL = URLRegex.search(link)
  95. PureLink = URL.group()
  96. LinkResults.append(PureLink)
  97. except:
  98. continue
  99. return LinkResults
  100.  
  101.  
  102. totalcomments = []
  103. subreddit = reddit.subreddit('socialism')
  104. for x in subreddit.top(limit=10):
  105. for comment in getAll(reddit,x):
  106. totalcomments.append(comment)
  107.  
  108. LinkList = CreateLinkList(totalcomments)
  109. Results = PurifyLinkList(LinkList)
  110. CreateLog(Results)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement