Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import gspread
- import requests
- from bs4 import BeautifulSoup
- HEADERS = {
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8'
- ',application/signed-exchange;v=b3;q=0.9',
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
- 'Chrome/95.0.4638.69 Safari/537.36'
- }
- url = 'https://habr.com/ru/search/?q=python&target_type=posts&order=relevance'
- r = requests.get(url, headers=HEADERS, timeout=5)
- soup = BeautifulSoup(r.text, 'html.parser')
- ads = soup.find_all('div', attrs={'class': 'tm-article-snippet'})
- for i in ads:
- title = i.find('span', attrs={'class': 'tm-user-info tm-article-snippet__author'}).get_text(strip=True)
- link = 'https://habr.com/' + i.find('a').get('href')
- data = i.find('span', attrs={'class': 'tm-article-snippet__datetime-published'}).get_text(strip=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement