Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- from newspaper import Article
- from fnmatch import fnmatch, fnmatchcase
- from pprint import pprint
- domain_filter = ['*gifmotion*', 'giphy.com', '*.youtube.*', '*ohmagif.*', '*.pinimg.*',\
- 'i.*', 'self.*', 'imgur.*', '*.imgur.*','youtube.*','youtu.*',\
- 'reddit.*', '*.reddit.*', 'gfycat.*', '*.tumblr*', 'media.*',\
- 'player.*', 'twitter.*']
- url_filter = ['*.png', '*.jpg', '*.gif', '*/video/*']
- def get_n_posts(n):
- checked_urls = []
- post_obj = []
- i = 0
- reddit = praw.Reddit(user_agent='starfoulah2097.tumblr.com',
- client_id='BqC_Qw_OQkFOQA', client_secret='800XxEW6kQGz4aXMHot5d7vzN6k',
- username='the_reddit_bot', password='aqwzsx*')
- subreddit = reddit.subreddit('worldnews')
- for submission in subreddit.top('day'):
- if not any(fnmatch(submission.domain, filter) for filter in domain_filter):
- if not any(fnmatch(submission.url, filter) for filter in url_filter):
- i = i + 1
- checked_urls.append(submission.url)
- if i == n:
- for link in checked_urls:
- post_tmp = Article(url = link)
- post_tmp.download()
- post_tmp.parse()
- post_obj.append(post_tmp)
- return (post_obj)
- pprint (get_n_posts(3)[0].title)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement