Advertisement
TankorSmash

ImageBam Scraping

Sep 14th, 2012
1,351
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.78 KB | None | 0 0
  1. import bs4
  2. import requests
  3.  
  4. s = requests.session()
  5.  
  6. base_url = r'http://www.imagebam.com'
  7.  
  8. limb_url = r'/image/c6b0cf70777810/'
  9.  
  10. image_links = []
  11.  
  12. done = False
  13. next_found = False
  14. while not done:
  15.     url = base_url + limb_url
  16.     r = s.get(url)
  17.  
  18.     soup = bs4.BeautifulSoup(r.content)
  19.     link_elems = soup.findAll(attrs={'class': 'buttonblue'})
  20.     for link in link_elems:
  21.         if 'save' in link.text:
  22.             image_links.append(link['href'])
  23.             print 'saving this link:', link['href']
  24.         elif 'next' in link.text:
  25.             limb_url = link['href']
  26.             print 'found', limb_url
  27.             next_found = True
  28.            
  29.     if not next_found:
  30.             print 'else, so were done for this page'
  31.             done = True
  32.  
  33. print image_links
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement