TankorSmash

GoneWild v 0.1

Oct 14th, 2012
403
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.38 KB | None | 0 0
  1. # you'll need to install these, they're on pypy/pip
  2. import requests, feedparser, bs4
  3.  
  4. import re, urlparse
  5.  
  6.  
  7. #----------------------------------------------------------------------
  8. def GetUserRssFeed(username):
  9.     """Grabs a users RSS feed  """
  10.    
  11.     url = r"http://www.reddit.com/user/{0}/submitted/.rss".format(username)
  12.     r = requests.get(url)  
  13.     rss = r.text
  14.  
  15.     return rss
  16.  
  17. #----------------------------------------------------------------------
  18. def ParseRssFeed(rss):
  19.     """parses a RSS feed and turns into a dict-like object"""
  20.    
  21.     feed = feedparser.parse(rss)
  22.    
  23.     return feed
  24.  
  25. #----------------------------------------------------------------------
  26. def FindSubmissionsToSubreddit(feed, subreddit):
  27.     """finds a submission to a subreddit based on an unaltered
  28.    feed dict, return a list of entries"""
  29.    
  30.     #list to put all the entries if they match subreddit
  31.     matched_entries = []    
  32.  
  33.  
  34.     entries = feed['entries']
  35.     for entry in entries:
  36.         link = entry['link']
  37.         #parse the url and split it along the slashes
  38.         parsed_url = urlparse.urlparse(link)
  39.         split_url = parsed_url.path.split(r'/')
  40.         #the url points to subreddit
  41.         if subreddit == split_url[2]:
  42.             matched_entries.append(entry)
  43.    
  44.     return matched_entries
  45.    
  46. #----------------------------------------------------------------------
  47. def FindAllImgurLinks(matched_entries):
  48.     """finds imgur links from the submission link, not self post"""
  49.  
  50.     imgur_urls = []
  51.    
  52.     #loop over entries and pull out the imgur urls
  53.     for entry in matched_entries:
  54.         soup = bs4.BeautifulSoup(entry['summary'])
  55.         #find the submission link
  56.         try:
  57.             link = soup.find('a', text=r'[link]')['href']
  58.             print "found:", link
  59.             imgur_urls.append(link)
  60.         except TypeError as e:
  61.             print 'self post', e
  62.              
  63.     print 'Found a total of {0} links'.format(len(imgur_urls))
  64.     return imgur_urls
  65.    
  66.  
  67. username = "PoloSnuggles"
  68. subreddit = "gonewild"
  69.  
  70. #grab a users 25 most recent submissions
  71. rss = GetUserRssFeed(username)
  72. #parse the feed into a dict-like
  73. feed = ParseRssFeed(rss)
  74. #find all entries that point to gonewild or subreddit
  75. matched_entries = FindSubmissionsToSubreddit(feed, subreddit)
  76. #find all submission links
  77. FindAllImgurLinks(matched_entries)
  78.  
  79. print
Add Comment
Please, Sign In to add comment