Advertisement
mbpaster

IMDB scraper

Mar 1st, 2014
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.83 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3.  
  4. done = False
  5.  
  6. while not done:
  7.   try:
  8.     url = raw_input("IMDB URL: ")
  9.  
  10.     # get the IMDB page
  11.     r = requests.get(url)
  12.     data = r.text
  13.  
  14.     # and parse it with BeautifulSoup
  15.     soup = BeautifulSoup(data)
  16.  
  17.     # the td containing what we're looking for
  18.     td = soup.find('td', {'id': 'overview-top'})
  19.     rating = td.find('div', {'class': 'star-box-giga-star'}).string
  20.     plot = td.find('p', {'itemprop': 'description'}).string
  21.     # the div containing the main actors in the cast
  22.     actors = td.find('div', {'itemprop': 'actors'})
  23.     stars = ', '.join([actor.string for actor in actors.find_all('span', {'class': 'itemprop', 'itemprop': 'name'})])
  24.  
  25.     print '*%s* - %s. %s' % (rating.strip(), stars, plot)
  26.   except KeyboardInterrupt:
  27.     done = True
  28. print
  29. print 'bye!'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement