Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup as bs
- class Spider(object):
- def __init__(self):
- self.url = 'https://www.murrengan.ru/murrs/'
- self.headers = {
- 'accept': '*/*',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
- }
- self.get_html()
- def get_html(self):
- with requests.Session() as session:
- response = session.get('https://www.murrengan.ru/murrs/', headers=self.headers)
- if response.status_code == 200:
- return response.content
- else:
- return f"Error: {response.status_code}"
- def parse_html(self, html):
- usr = []
- soup = bs(html, 'lxml')
- divs = soup.find_all('div', {'class': 'murr-card'})
- for div in divs:
- author = div.find('a', {'class': 'profile__name'}).text
- usr.append(author)
- return usr
- if __name__ == '__main__':
- obj = Spider()
- html = obj.get_html()
- print(obj.parse_html(html))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement