Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import requests
- # https://www.mediawiki.org/wiki/API:Categorymembers
- API_URL = "https://ru.wikipedia.org/w/api.php"
- USER_AGENT = {"User-Agent": "UsernameBot; email@example.com; python3.9; requests"}
- def get_category_members(title: str):
- """
- :param title: имя категории без префикса пространства имён
- :return: список[] с именами страниц
- """
- result, cmcontinue = [], ""
- params = {'action': 'query', 'list': 'categorymembers', 'cmtitle': f'Category:{title}', 'cmprop': 'title',
- 'cmnamespace': '0', 'cmtype': 'page', 'cmlimit': 'max', 'format': 'json', 'utf8': 1, 'maxlag': 5}
- while cmcontinue is not None:
- params['cmcontinue'] = cmcontinue
- while True:
- r = requests.post(url=API_URL, data=params, headers=USER_AGENT).json()
- if "error" in r and r['error']['code'] == "maxlag":
- time.sleep(int(r.headers['Retry-After']))
- else:
- break
- cmcontinue = None if "continue" not in r else r['continue']['cmcontinue']
- result.extend([page['title'] for page in r['query']['categorymembers']])
- if cmcontinue is not None:
- time.sleep(2)
- return result
- print(*get_category_members("Актёры по алфавиту"), sep='\n')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement