Advertisement
Guest User

Untitled

a guest
Jan 25th, 2017
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.41 KB | None | 0 0
  1. import praw
  2. from newspaper import Article
  3. from fnmatch import fnmatch, fnmatchcase
  4. from pprint import pprint
  5.  
  6. domain_filter = ['*gifmotion*', 'giphy.com', '*.youtube.*', '*ohmagif.*', '*.pinimg.*',\
  7.                 'i.*', 'self.*', 'imgur.*', '*.imgur.*','youtube.*','youtu.*',\
  8.                 'reddit.*', '*.reddit.*', 'gfycat.*', '*.tumblr*', 'media.*',\
  9.                 'player.*', 'twitter.*']
  10. url_filter = ['*.png', '*.jpg', '*.gif', '*/video/*']
  11.                
  12. def get_n_posts(n):
  13.     checked_urls = []
  14.     post_obj = []
  15.     i = 0
  16.     reddit = praw.Reddit(user_agent='starfoulah2097.tumblr.com',
  17.                          client_id='BqC_Qw_OQkFOQA', client_secret='800XxEW6kQGz4aXMHot5d7vzN6k',
  18.                          username='the_reddit_bot', password='aqwzsx*')
  19.     subreddit = reddit.subreddit('worldnews')
  20.     for submission in subreddit.top('day'):
  21.         if not any(fnmatch(submission.domain, filter) for filter in domain_filter):
  22.             if not any(fnmatch(submission.url, filter) for filter in url_filter):
  23.                 i = i + 1
  24.                 checked_urls.append(submission.url)
  25.             if i == n:
  26.                 for link in checked_urls:
  27.                     post_tmp = Article(url = link)
  28.                     post_tmp.download()
  29.                     post_tmp.parse()
  30.                     post_obj.append(post_tmp)
  31.     return (post_obj)
  32.    
  33. pprint (get_n_posts(3)[0].title)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement