Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib
- from bs4 import BeautifulSoup as bs
- def get_media_imdb(imdbid):
- url = "http://www.imdb.com/title/"+imdbid+"/"
- soup = bs(urllib.urlopen(url), "html5lib")
- #pare rating
- ss = soup.findAll('span',attrs={'itemprop':'ratingValue'})
- try:
- rating = str(ss).split(">")[1].split("<")[0]
- except:
- rating = None
- #parse poster
- poster = soup.findAll('div',attrs={'class':'poster'})
- poster = str(poster).split('"')[9]
- #parse genre
- gen = soup.findAll('div',attrs={'class':'subtext'})
- x = bs(str(gen), "html5lib")
- gen = x.findAll('span',attrs={'class':'itemprop'})
- genre = []
- for g in gen:
- genre.append(str(g).split(">")[1].split("<")[0])
- #parse title and year
- t = soup.findAll('h1')
- title = str(t).split(">")[1].split("<")[0].split("\\")[0]
- y = bs(str(t), "html5lib")
- # if "TV Series" in str(y):
- # print "Serial"
- # else:
- # print "Movie"
- year = y.findAll('a')
- try:
- year = str(year).split(">")[1].split("<")[0]
- type = "Movie"
- except:
- type = "Serial"
- result = {'Type': type, 'Year': year, 'Title': title, 'imdbRating': rating, 'Poster': poster, 'Genre': genre}
- return result
- if __name__ == "__main__":
- print get_media_imdb('tt4846340')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement