Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # pip install beautifulsoup4
- # как установить описано здесь - https://www.crummy.com/software/BeautifulSoup/bs4/doc.ru/bs4ru.html#id8
- import requests
- from bs4 import BeautifulSoup
- r = requests.get(url)
- soup = BeautifulSoup(r.content, "html.parser")
- all_h3 = soup.find_all('h3')
- for i in range(len(all_h3[2:])):
- curr_h4 = str(all_h3[i+2].next_sibling.next_sibling)
- next_h4 = ''
- if i < 31:
- next_h4 = str(all_h3[i+3].next_sibling.next_sibling)
- else:
- next_h4 = str(soup.find('div', {'class': 'clearfix'}))
- pointA = str(soup).find(curr_h4)
- pointB = str(soup).find(next_h4)
- text = str(soup)[pointA:pointB]
- # print(i, text)
- flow = all_h3[i+2].get_text()
- programm = BeautifulSoup(text).find('h4').get_text()
- rows = len(BeautifulSoup(text).find_all('tr'))
- # print(i, flow, programm, rows)
- with open('result.csv', 'a') as file:
- result = f'{flow};{programm};{rows}\n'
- file.write(result)
Add Comment
Please, Sign In to add comment