Advertisement
shinemic

jitashe.py

May 25th, 2023
507
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.98 KB | None | 0 0
  1. from requests_html import HTMLSession, requests
  2. from typing import Union
  3. from pathlib import Path
  4. import logging
  5.  
  6.  
  7. class JitasheDownloader:
  8.     BASE_URL = 'https://www.jitashe.org'
  9.  
  10.     def __init__(self):
  11.         self.session = HTMLSession()
  12.  
  13.     def _search(self, keyword) -> Union[str, None]:
  14.         """ 查询页,如有查询结果返回第一个,否则返回 None """
  15.  
  16.         search_req = self.session.get(f'{self.BASE_URL}/search/tab/{keyword}')
  17.         search_first = search_req.html.find('#threadlist > div > div.text > a', first=True)
  18.         first_elem_url = search_first and self.BASE_URL + search_first.element.attrib['href']
  19.         return first_elem_url
  20.  
  21.     def _download(self, keyword, out_dir) -> None:
  22.         """ 根据查询页返回结果下载乐谱 """
  23.  
  24.         search_url = self._search(keyword)
  25.         if search_url:
  26.             score_req = self.session.get(search_url)
  27.             title = score_req.html.find('h1.gb-title', first=True).text
  28.             scores_url = [url.split('!')[0] for url in score_req.html.xpath(
  29.                 '//ignore_js_op/picture/img/@src')]
  30.  
  31.             if scores_url:
  32.                 download_path = Path(out_dir) / title
  33.                 download_path.mkdir(parents=True, exist_ok=True)
  34.                 filename_width = len(str(len(scores_url)))
  35.                 for i, url in enumerate(scores_url, 1):
  36.                     file = download_path / f'{title}-{i:0{filename_width}}.jpg'
  37.                     logging.debug(f'下载中 - {file.name}')
  38.                     file.write_bytes(self.session.get(url).content)
  39.  
  40.                 logging.info(f'完成下载:{title}')
  41.             else:
  42.                 logging.info(f'无图片谱: {keyword}')
  43.         else:
  44.             logging.info(f'搜索词「{keyword}」下无对应结果')
  45.  
  46.         logging.debug('')
  47.  
  48.     def download(self, keyword, out_dir='./output', max_retry=3) -> None:
  49.         """ 下载乐谱(支持重试) """
  50.  
  51.         for i in range(max_retry):
  52.             try:
  53.                 self._download(keyword, out_dir=out_dir)
  54.                 break
  55.             except requests.RequestException:
  56.                 logging.error(f'重试第{i + 1}次下载 - {keyword}')
  57.                 continue
  58.         else:
  59.             logging.error(f'{max_retry}次尝试下载失败 - {keyword}')
  60.  
  61.  
  62. if __name__ == '__main__':
  63.     logging.basicConfig(
  64.         format='[%(asctime)s] [%(levelname)-5s] %(message)s',
  65.         level=logging.INFO,
  66.         datefmt='%Y-%m-%d %T'
  67.     )
  68.  
  69.     logging.getLogger("urllib3.connectionpool").setLevel(logging.CRITICAL)
  70.  
  71.     downloader = JitasheDownloader()
  72.     song_list = [
  73.         '同桌的你', '十年', '蓝莲花', '千千阙歌', '大海',
  74.         '月亮代表我的心', '童话', '爱情转移', '外婆的澎湖湾',
  75.         '笨小孩', '上海滩', '红日', '我们的爱', '雨一直下',
  76.         '江南','孤勇者', '孤勇者1', '孤勇者2'
  77.     ]
  78.     for song in song_list:
  79.         downloader.download(song)
  80.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement