Advertisement
TankorSmash

temp.py

Apr 28th, 2013
137
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.54 KB | None | 0 0
  1. import requests, bs4, re
  2.  
  3. url = r'http://www.bing.com/images/search?q=scarlett+johansson&qft=%2Bfilterui%3Aimagesize-large'
  4.  
  5. r = requests.get(url)
  6.  
  7. soup = bs4.BeautifulSoup(r.content)
  8.  
  9. res = soup.findAll('div', {'class': 'dg_u'})
  10.  
  11. for div in res:
  12.     a_elem = div.find('a')
  13.     m_attr = a_elem.get('m')
  14.     if m_attr:
  15.         dirty_url = m_attr.split('oi:')[-1]
  16.         pattern = "http:.*.jpg"
  17.         matches = re.findall(pattern, dirty_url)
  18.         if matches:
  19.             cleaned_url = matches[0]
  20.             print cleaned_url
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement