Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- user_agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36"^M
- base_url = "http://www.imdb.com/title/"^M
- ^M
- def get_IMDB_page(imdb_id):
- url = base_url + imdb_id + "/"^M
- req = urllib2.Request(url)^M
- req.add_header('User-Agent', user_agent)^M
- socket.setdefaulttimeout(30)
- html = ""
- try:^M
- response = urllib2.urlopen(req)^M
- html=response.read()^M
- response.close()^M
- return html
- except urllib2.URLError as err: ^M
- defaultLog( addonLanguage(32505) % ( err ) )^M
- response.close()^M
- return html
- except socket.error as err:^M
- defaultLog( addonLanguage(32505) % ( err ) )^M
- response.close()^M
- return html
- def parse_IMDb_page(imdb_id):^M
- do_loop = 1
- while do_loop == 1 :
- html = get_IMDB_page(imdb_id)
- if html == "":
- time.sleep(1)
- else:
- do_loop = 0
- soup = beautifulsoup(html)^M
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement