Advertisement
Guest User

Untitled

a guest
Sep 18th, 2017
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.46 KB | None | 0 0
  1. #!usr/bin/python3
  2.  
  3. """
  4. Created on Mon Sep 4 15:06:35 2017
  5.  
  6. @author: Michy
  7. """
  8.  
  9. import os
  10. import praw
  11. import logging
  12. import argparse
  13. import config_data
  14. from datetime import datetime
  15. from prawcore import NotFound
  16.  
  17. VERSION = '1.0'
  18.  
  19. def find_relevant_posts(reddit_obj, subreddit_name, keyword, limit=50, flag='new'):
  20.  
  21. # This function looks for relevant posts in a given subreddit using the supplied
  22. # keywords.
  23. #
  24. # Params:
  25. # @reddit_obj: a Reddit instance.
  26. # @subreddit_name: name of the subreddit to be searched (string)
  27. # @keyword: keyword to be used for the search (string)
  28. # @limit: maximum number of posts searched (integer).
  29. # @flag: Reddit's posts flag (string).
  30. #
  31. # Returns a tuple of two lists, titles and urls containing the titles and
  32. # the urls of the relevant posts, respectively.
  33. #
  34.  
  35. subreddit = reddit.subreddit(subreddit_name)
  36.  
  37. if flag == 'new':
  38. new_submissions = subreddit.new(limit=limit)
  39. elif flag == 'rising':
  40. new_submissions = subreddit.rising(limit=limit)
  41. elif flag == 'controversial':
  42. new_submissions = subreddit.controversial(limit=limit)
  43. elif flag == 'top':
  44. new_submissions = subreddit.top(limit=limit)
  45. else:
  46. new_submissions = subreddit.new(limit=limit)
  47.  
  48. urls = []
  49. titles = []
  50.  
  51. for submission in new_submissions:
  52. if not submission.stickied:
  53. if keyword in submission.title.lower() or keyword in submission.selftext.lower():
  54. urls.append(submission.url)
  55. titles.append(submission.title)
  56.  
  57. return titles, urls
  58.  
  59. def find_relevant_posts_wider(reddit_obj, subreddit_names, keywords, limit=50, flag='new'):
  60.  
  61. # This function looks for relevant posts in each subreddit supplied using the
  62. # keywords supplied in the keywords argument.
  63. #
  64. # Params:
  65. # @reddit_obj: a Reddit instance.
  66. # @subreddit_names: names of the subreddit to be searched (list of strings)
  67. # @keywords: keywords to be used for the search (list of string)
  68. # @limit: maximum number of posts searched (integer).
  69. # @flag: Reddit's posts flag (string).
  70. #
  71. # Returns a tuple of two lists, titles_wider and urls_wider containing the
  72. # titles and the urls of the relevant posts, respectively.
  73. #
  74.  
  75. titles_wider = []
  76. urls_wider = []
  77.  
  78. for subreddit in subreddit_names:
  79. for keyword in keywords:
  80. titles, urls = find_relevant_posts(reddit_obj, subreddit, keyword, limit, flag)
  81. for t, u in zip(titles, urls):
  82. titles_wider.append(t)
  83. urls_wider.append(u)
  84.  
  85. return titles_wider, urls_wider
  86.  
  87. def save_findings(titles, urls, filename):
  88.  
  89. # This function saves the results of the search.
  90. #
  91. # Params:
  92. # @titles: titles of the posts (list of strings).
  93. # @urls: urls of the posts (list of strings).
  94. # @filename: name of the file to save (string).
  95. #
  96. # Returns void.
  97. #
  98.  
  99. filename = os.path.join(os.getcwd(), filename)
  100.  
  101. if os.path.exists(filename):
  102. mode = 'a'
  103. else:
  104. mode = 'w'
  105.  
  106. with open(filename, mode) as f:
  107. for t, u in zip(titles, urls):
  108. f.write('\n'.join([t, u]))
  109. f.write('\n\n')
  110.  
  111. print("Search results saved in {}".format(filename))
  112.  
  113.  
  114. def check_subreddit_exists(reddit, subreddit):
  115.  
  116. # This function checks if a subreddit exists.
  117. #
  118. # Params:
  119. # @reddit: a Reddit instance.
  120. # @subreddit: subreddit to be checked (string).
  121. #
  122. # Returns: True if the subreddit exists, false otherwise.
  123. #
  124.  
  125. exists = True
  126. try:
  127. reddit.subreddits.search_by_name(subreddit, exact=True)
  128. except NotFound:
  129. exists = False
  130. return exists
  131.  
  132. def check_limit_range(limit):
  133.  
  134. # This function checks that the limit parameter is in the 1-500 range.
  135. # If limit is not within the selected range, an ArgumentTypeError is raised.
  136. #
  137. # Params:
  138. # @limit: limit to be checked (integer)
  139. #
  140. # Returns: limit
  141. #
  142.  
  143. limit = int(limit)
  144. if limit <= 0 or limit > 500:
  145. raise argparse.ArgumentTypeError("{} is not a valid value".format(limit))
  146. return limit
  147.  
  148. def setup_argparser():
  149.  
  150. # This function sets up the argument parser.
  151. #
  152. # Returns the arguments
  153. #
  154.  
  155. parser = argparse.ArgumentParser(description='Reddit Browsing Bot version {}'.format(VERSION))
  156. parser.add_argument('-s','--subreddits', type=str, required=True, help='Subreddits to look into.')
  157. parser.add_argument('-k', '--keywords', type=str, required=True, help='Keywords to search for.')
  158. parser.add_argument('-l', '--limit', type=check_limit_range, default=50, help='Maximum number of searches. Must be included in the range 1 - 500')
  159. parser.add_argument('-f', '--flag', type=str, default='new', choices=['new', 'rising', 'controversial', 'top'], help='Reddit flags.')
  160. parser.add_argument('-o', '--output', type=str, help='Output file name.')
  161. parser.add_argument('-v', '--verbose', action='store_true', help='Be verbose? Prints output if flag is set.')
  162.  
  163. args = parser.parse_args()
  164.  
  165. return args
  166.  
  167. def setup_logger():
  168.  
  169. # This function sets up the logger.
  170. #
  171. # Returns logger.
  172. #
  173.  
  174. logging.basicConfig(filename='reddit_bot_log.log', level=logging.DEBUG)
  175. logger = logging.getLogger(name='Reddit Browsing Bot V. {}'.format(VERSION))
  176.  
  177. return logger
  178.  
  179. # Main
  180. if __name__ == '__main__':
  181.  
  182. # Setup argument parser
  183. args = setup_argparser()
  184. # Initialize logger
  185. logger = setup_logger()
  186.  
  187. # Retrieve arguments
  188. subreddits = args.subreddits
  189. keywords = args.keywords
  190. limit = args.limit
  191. flag = args.flag
  192. filename = args.output
  193. verbose = args.verbose
  194.  
  195. # Initialize reddit instance
  196. reddit = praw.Reddit(client_id = config_data.client_id,
  197. client_secret = config_data.client_secret,
  198. username = config_data.username,
  199. password = config_data.password,
  200. user_agent = 'Reading bot looking for hot topics')
  201. logger.log(logging.INFO, "Reddit instance initiated.")
  202.  
  203. # Check if every subreddits exist. Ignore those that do not exist
  204. subreddits = [sub.lower() for sub in subreddits if check_subreddit_exists(reddit, sub.lower())]
  205. # Check that length of keywords is > 1. Ignore keywords whose length is < 1
  206. keywords = [key.lower() for key in keywords if len(key) > 1]
  207.  
  208. print("Subreddits searched: {} \nKeywords used {}\n\n".format(subreddits, keywords))
  209.  
  210. # Start search
  211. logger.log(logging.INFO,
  212. "Started search for {} in {} at {}".format(keywords,
  213. subreddits,
  214. datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
  215. titles, urls = find_relevant_posts_wider(reddit, subreddits, keywords, limit, flag)
  216. logger.log(logging.INFO, "Search ended.")
  217.  
  218. # Save findings if a filename has been provided.
  219. if filename is not None:
  220. logger.log(logging.INFO, "Saving data.")
  221. save_findings(titles, urls, filename)
  222.  
  223. # If the program needs to be verbose or if filename has not been provided,
  224. # print output to the console
  225. if verbose or filename is None:
  226. for t, u in zip(titles, urls):
  227. print(t, u, sep='\n', end='\n\n')
  228.  
  229. # Main ended
  230. logger.log(logging.INFO, "Main executon ended successfully.")
  231. print("\n\nExiting....")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement