Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib.request
- from bs4 import BeautifulSoup
- import pandas as pd
- def get_html(url):
- response = urllib.request.urlopen(url)
- return response.read()
- def parse(html):
- soup = BeautifulSoup(html)
- table = soup.find('table', class_='results')
- horror = []
- for row in table.find_all('tr')[1:]:
- cols = row.find_all('td')
- year = row.find_all('span')
- rating = row.select('span.value')[0].text if row.select('span.value') else '0.0'
- horror.append([
- cols[0].text, # № п/п
- cols[2].a.text, # название
- year[1].text, # год
- rating # рейтинг
- ])
- df = pd.DataFrame(data=horror, columns=['Номер', 'Название', 'Год', 'Рейтинг'])
- df = df.set_index('Номер')
- print(df.head(len(horror)))
- def main():
- parse(get_html('http://www.imdb.com/search/title?at=0&genres=horror&sort=user_rating&title_type=feature'))
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement