Advertisement
FancyKing

小学期Python 豆瓣即将上映

Jun 30th, 2020
1,732
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.87 KB | None | 0 0
  1. # !/usr/bin/python3
  2. # -*- coding: utf-8 -*-
  3. # @Time    : 2020/6/30 18:42
  4. # @Author  : FancyKing
  5. # @FileName: dou_ban.py
  6. # @Software: PyCharm
  7.  
  8. import requests
  9. from fake_headers import Headers
  10. from fake_useragent import UserAgent
  11. from lxml import html
  12.  
  13.  
  14. def gen_headers():
  15.     fake_useragent = UserAgent(use_cache_server=True, cache=True, verify_ssl=False)
  16.     # fake_useragent.update()
  17.     fake_headers = Headers(headers=False).generate()
  18.     fake_headers['User-Agent'] = fake_useragent.random
  19.     # print(fake_headers)
  20.     return fake_headers
  21.  
  22.  
  23. def dou_ban_show(film_list_sorted):
  24.     print('There are {} film(s) will on.'.format(len(film_list_sorted)))
  25.     for foo in film_list_sorted:
  26.         print('影片名称: {}'.format(foo['film_name']))
  27.         print('\t上映日期: {}'.format(foo['film_date']))
  28.         print('\t影片类型: {}'.format(foo['film_prop']))
  29.         print('\t出版地区: {}'.format(foo['film_bel']))
  30.         print('\t喜爱人数: {}'.format(foo['film_fav']))
  31.  
  32.  
  33. def dou_ban_curler(uri):
  34.     curler = requests.Session()
  35.     site_data = curler.get(url=uri, headers=gen_headers()).text
  36.     site_data.encode('utf-8')
  37.     parser_data = html.fromstring(site_data)
  38.     film_list_origin = parser_data.xpath('//div[@class="intro"]')
  39.     film_list_sort = []
  40.     for foo in film_list_origin:
  41.         film_name = foo.xpath('h3/a/text()')[0]
  42.         film_date = foo.xpath('ul/li[1]/text()')[0]
  43.         film_prop = foo.xpath('ul/li[2]/text()')[0]
  44.         film_bel = foo.xpath('ul/li[3]/text()')[0]
  45.         film_fav = foo.xpath('ul/li[4]/span/text()')[0].replace('人想看', '')
  46.         film_list_sort.append({
  47.             'film_name': film_name,
  48.             'film_date': film_date,
  49.             'film_prop': film_prop,
  50.             'film_bel': film_bel,
  51.             'film_fav': film_fav
  52.         })
  53.     film_list_sort.sort(key=lambda x: float(x['film_fav']), reverse=True)
  54.     return film_list_sort
  55.  
  56.  
  57. # https://files.catbox.moe/wnsgeb.png
  58. if __name__ == '__main__':
  59.     douban_latest_uri = 'https://movie.douban.com/cinema/later/beijing/'
  60.     film_list = dou_ban_curler(douban_latest_uri)
  61.     while len(film_list) <= 0:
  62.         dou_ban_curler(douban_latest_uri)
  63.     dou_ban_show(film_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement