Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import aiohttp
- from bs4 import BeautifulSoup as bs
- import asyncio
- headers = {
- "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36",
- "accept": "*/*"
- }
- url = "https://ryazan.hh.ru/search/vacancy"
- async def parse_hhru(text, pages=10):
- async with aiohttp.ClientSession(headers=headers) as session:
- params = {"text": text}
- responses = []
- jobs = []
- for page in range(1, pages + 1):
- params['page'] = page
- responses.append(session.get(url, params=params))
- for response in asyncio.as_completed(responses):
- response = await response
- if response.status == 200:
- soup = bs(await response.text(), "lxml")
- divs = soup.find_all("div", attrs={"data-qa": "vacancy-serp__vacancy"}) + soup.find_all("div", attrs={"data-qa": "vacancy-serp__vacancy vacancy-serp__vacancy_premium"})
- for div in divs:
- title_tag = div.find("a", attrs={"data-qa": "vacancy-serp__vacancy-title"})
- title = title_tag.text
- href = title_tag['href']
- company = div.find("a", attrs={"data-qa": "vacancy-serp__vacancy-employer"}).text
- text1 = div.find("div", attrs={"data-qa": "vacancy-serp__vacancy_snippet_responsibility"}).text
- text2 = div.find("div", attrs={"data-qa": "vacancy-serp__vacancy_snippet_requirement"}).text
- content = text1 + " " + text2
- jobs.append({
- "title": title,
- "href": href,
- "company": company,
- "content": content
- })
- else:
- ...
- return jobs
- result = asyncio.run(parse_hhru("python"))
- print(result)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement