Untitled

import requests
from bs4 import BeautifulSoup

URL = 'https://ru.wikipedia.org/wiki/%D0%A3%D0%BF%D0%BE%D0%BB%D0%BD%D0%BE%D0%BC%D0%BE%D1%87%D0%B5%D0%BD%D0%BD%D1%8B%D0%B9_%D0%BF%D0%BE_%D0%B7%D0%B0%D1%89%D0%B8%D1%82%D0%B5_%D0%BF%D1%80%D0%B0%D0%B2_%D0%BF%D1%80%D0%B5%D0%B4%D0%BF%D1%80%D0%B8%D0%BD%D0%B8%D0%BC%D0%B0%D1%82%D0%B5%D0%BB%D0%B5%D0%B9_%D0%B2_%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D0%B8'
HEADERS = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
    'accept': '*/*'}
HOST = 'https://ru.wikipedia.org'

def get_dig_date(str, mode=1):
    dictt = {'января': 1, 'янв': 1, 'jan': 1, 'january': 1, '1': 1, '01': 1,
            'февраля': 2, 'фев': 2, 'feb': 2, 'february': 2, '2': 2, '02': 2,
            'марта': 3, 'march': 3, '3': 3, '03': 3,
            'апреля': 4, 'april': 4, '4': 4, '04': 4,
            'мая': 5, 'may': 5, '5': 5, '05': 5,
            'июня': 6, 'june': 6, '6': 6, '06': 6,
            'июля': 7, 'july': 7, '7': 7, '07': 7,
            'августа': 8, 'august': 8, '8': 8, '08': 8,
            'сентября': 9, 'september': 9, '9': 9, '09': 9,
            'октября': 10, 'october': 10, '10': 10,
            'ноября': 11, 'november': 11, '11': 11,
            'декабря': 12, 'december': 12, '12': 12,
    }
    str = str.replace('-', ' ')
    list = str.split()
    ind_d = 0
    ind_m = 1
    ind_y = 2
    if mode == 2:
        ind_d = 2
        ind_m = 1
        ind_y = 0

    lst = {'day': int(list[ind_d]), 'month': dictt[list[ind_m]], 'year': int(list[ind_y])}
    return lst


def get_html(url, params=None):
    html = requests.get(url, headers=HEADERS, params=params)
    return html


def get_content(html, mode):
    soup = BeautifulSoup(html.text, 'html.parser')
    table = soup.find('table', class_='infobox')
    image_box = table.find('td', class_='infobox-image')
    p_link = HOST + table.find('span', class_='no-wikidata').find_next('a').get('href')
    p_soup = BeautifulSoup(get_html(p_link).text, 'html.parser')
    p_box = p_soup.find('td', class_='plainlist')

    person = {
        'name': table.find('span', class_='no-wikidata').get_text(),
        'p_link': p_link,
        'b_date': p_box.find_next('span', class_='wikidata-claim').find_next('a').get_text() + " " + p_box.find_next('span', class_='wikidata-claim').find_next('a').find_next('a').get_text(),
        'ef_date': table.find('span', class_='no-wikidata').find_next('a').find_next('a').get_text() + " " + table.find('span', class_='no-wikidata').find_next('a').find_next('a').find_next('a').get_text(),
        'image_link': 'https:' + image_box.find('img').get('src')
    }
    return person

def parse():
    html = get_html(URL)
    person = get_content(html)
    name = {'first_name': person['name'].split()[0],
            'middle_name': person['name'].split()[1],
            'last_name': person['name'].split()[2]
            }
    content = {
    'name': name,
    'p_link': person['p_link'],
    'b_date': get_dig_date(person['b_date']),
    'ef_date': get_dig_date(person['ef_date'].replace('года', '')),
    'image_link': person['image_link']
    }
    print(content)


parse()