Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''
- 需要 Python 3.6 + requests
- '''
- import requests
- from math import ceil
- from urllib import parse
- import time
- class favlist():
- apiUrl = 'https://api.bilibili.com/medialist/gateway/base/spaceDetail?media_id={fid}&pn={pn}&ps=20'
- def __init__(self, url: str):
- '''
- 初始化类,对 URL 进行解析,并从 API 中获得收藏夹视频数目
- :param url: 收藏夹 URL
- Usage::
- favlist('https://space.bilibili.com/3992364/favlist?fid=45027464')
- '''
- params = self.parse_url(url)
- self.fid = params['fid'][0]
- favList = self.get_content(1)
- self.mediaCount = favList['data']['info']['media_count']
- def parse_url(self, inUrl: str):
- '''
- 解析 URL,返回参数列表
- :param inUrl: 被解析 URL
- :rtype: dict
- '''
- paramStr = parse.urlsplit(inUrl)
- params = parse.parse_qs(paramStr[3])
- return params
- def get_content(self, pn: int):
- '''
- 通过 API 获得收藏夹内容
- :param pn: 收藏夹页数
- :rtype: dict
- '''
- currUrl = self.apiUrl.format(fid=self.fid, pn=pn)
- try:
- apiResp = requests.get(currUrl)
- except requests.HTTPError as e:
- print(f'get_content() error, HTTPError {e}')
- return {}
- except requests.ConnectionError as e:
- print(f'get_content() error, ConnectionError {e}')
- return {}
- else:
- favListJson = apiResp.json()
- return favListJson
- def get_failure_media(self):
- '''
- 根据标题筛选失效视频
- :return: 失效视频列表
- :rtype: list
- '''
- failMedia = []
- currPage = 1
- totalPage = ceil(self.mediaCount/20)
- print('Failure media:')
- while currPage <= totalPage:
- favList = self.get_content(currPage)
- print(f'Page {currPage}:')
- # B站程序员蜜汁英语水平,这个`medias`是怎么回事
- for i in favList['data']['medias']:
- if i['title'] == '已失效视频':
- print(f'av{i["id"]}')
- failMedia.append(i)
- currPage += 1
- return failMedia
- def get_biliplus_data(aid: int):
- plusUrl = f'https://hd.biliplus.com/api/aidinfo?aid={aid}'
- try:
- plusResp = requests.get(plusUrl, timeout=18)
- plusJson = plusResp.json()
- if plusJson['code'] != 0:
- # biliPlus 文档有毒吧,code 的意义只能实验出来...
- raise requests.HTTPError(f'status code {plusJson["code"]}')
- except requests.HTTPError as e:
- print(f'get_biliplus_data(), HTTPError {e}')
- return {}
- except requests.ConnectionError as e:
- print(f'get_biliplus_data(), ConnectionError {e}')
- return {}
- else:
- mediaData = plusJson["data"][str(aid)]
- print(
- f'title: {mediaData["title"]}, author: {mediaData["author"]}, av{aid}')
- return mediaData
- def save_to_file(inStr: str, fileName: str):
- try:
- fp = open(fileName, 'a')
- except IOError as identifier:
- fp = open(fileName, 'w')
- finally:
- fp.write(inStr)
- fp.close()
- if __name__ == "__main__":
- hint = '输入收藏夹URL\n例子: https://space.bilibili.com/3992364/favlist?fid=45027464\n> '
- url = input(hint)
- f = favlist(url)
- fail = f.get_failure_media()
- print('Failure videos detail:')
- for i in fail:
- save_to_file(f'av{i["id"]}\n', 'avNum.txt')
- plus = get_biliplus_data(i['id'])
- if len(plus) != 0:
- save_to_file(f'{plus["title"]},{plus["author"]},av{i["id"]}\n', 'result.csv')
- # biliPlus 的土豆服务器一分钟只能请求 5 次
- # 而且还经常爆炸
- time.sleep(20)
Advertisement
Add Comment
Please, Sign In to add comment