Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # you'll need to install these, they're on pypy/pip
- import requests, feedparser, bs4
- import re, urlparse
- #----------------------------------------------------------------------
- def GetUserRssFeed(username):
- """Grabs a users RSS feed """
- url = r"http://www.reddit.com/user/{0}/submitted/.rss".format(username)
- r = requests.get(url)
- rss = r.text
- return rss
- #----------------------------------------------------------------------
- def ParseRssFeed(rss):
- """parses a RSS feed and turns into a dict-like object"""
- feed = feedparser.parse(rss)
- return feed
- #----------------------------------------------------------------------
- def FindSubmissionsToSubreddit(feed, subreddit):
- """finds a submission to a subreddit based on an unaltered
- feed dict, return a list of entries"""
- #list to put all the entries if they match subreddit
- matched_entries = []
- entries = feed['entries']
- for entry in entries:
- link = entry['link']
- #parse the url and split it along the slashes
- parsed_url = urlparse.urlparse(link)
- split_url = parsed_url.path.split(r'/')
- #the url points to subreddit
- if subreddit == split_url[2]:
- matched_entries.append(entry)
- return matched_entries
- #----------------------------------------------------------------------
- def FindAllImgurLinks(matched_entries):
- """finds imgur links from the submission link, not self post"""
- imgur_urls = []
- #loop over entries and pull out the imgur urls
- for entry in matched_entries:
- soup = bs4.BeautifulSoup(entry['summary'])
- #find the submission link
- try:
- link = soup.find('a', text=r'[link]')['href']
- print "found:", link
- imgur_urls.append(link)
- except TypeError as e:
- print 'self post', e
- print 'Found a total of {0} links'.format(len(imgur_urls))
- return imgur_urls
- username = "PoloSnuggles"
- subreddit = "gonewild"
- #grab a users 25 most recent submissions
- rss = GetUserRssFeed(username)
- #parse the feed into a dict-like
- feed = ParseRssFeed(rss)
- #find all entries that point to gonewild or subreddit
- matched_entries = FindSubmissionsToSubreddit(feed, subreddit)
- #find all submission links
- FindAllImgurLinks(matched_entries)
- print
Add Comment
Please, Sign In to add comment