Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # !/usr/bin/python3
- # -*- coding: utf-8 -*-
- # @Time : 2020/6/30 18:42
- # @Author : FancyKing
- # @FileName: dou_ban.py
- # @Software: PyCharm
- import requests
- from fake_headers import Headers
- from fake_useragent import UserAgent
- from lxml import html
- def gen_headers():
- fake_useragent = UserAgent(use_cache_server=True, cache=True, verify_ssl=False)
- # fake_useragent.update()
- fake_headers = Headers(headers=False).generate()
- fake_headers['User-Agent'] = fake_useragent.random
- # print(fake_headers)
- return fake_headers
- def dou_ban_show(film_list_sorted):
- print('There are {} film(s) will on.'.format(len(film_list_sorted)))
- for foo in film_list_sorted:
- print('影片名称: {}'.format(foo['film_name']))
- print('\t上映日期: {}'.format(foo['film_date']))
- print('\t影片类型: {}'.format(foo['film_prop']))
- print('\t出版地区: {}'.format(foo['film_bel']))
- print('\t喜爱人数: {}'.format(foo['film_fav']))
- def dou_ban_curler(uri):
- curler = requests.Session()
- site_data = curler.get(url=uri, headers=gen_headers()).text
- site_data.encode('utf-8')
- parser_data = html.fromstring(site_data)
- film_list_origin = parser_data.xpath('//div[@class="intro"]')
- film_list_sort = []
- for foo in film_list_origin:
- film_name = foo.xpath('h3/a/text()')[0]
- film_date = foo.xpath('ul/li[1]/text()')[0]
- film_prop = foo.xpath('ul/li[2]/text()')[0]
- film_bel = foo.xpath('ul/li[3]/text()')[0]
- film_fav = foo.xpath('ul/li[4]/span/text()')[0].replace('人想看', '')
- film_list_sort.append({
- 'film_name': film_name,
- 'film_date': film_date,
- 'film_prop': film_prop,
- 'film_bel': film_bel,
- 'film_fav': film_fav
- })
- film_list_sort.sort(key=lambda x: float(x['film_fav']), reverse=True)
- return film_list_sort
- # https://files.catbox.moe/wnsgeb.png
- if __name__ == '__main__':
- douban_latest_uri = 'https://movie.douban.com/cinema/later/beijing/'
- film_list = dou_ban_curler(douban_latest_uri)
- while len(film_list) <= 0:
- dou_ban_curler(douban_latest_uri)
- dou_ban_show(film_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement