Advertisement
Guest User

Untitled

a guest
Jan 21st, 2020
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.90 KB | None | 0 0
  1. import aiohttp
  2. from bs4 import BeautifulSoup as bs
  3. import asyncio
  4.  
  5. headers = {
  6.     "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36",
  7.     "accept": "*/*"
  8. }
  9. url = "https://ryazan.hh.ru/search/vacancy"
  10.  
  11.  
  12. async def parse_hhru(text, pages=10):
  13.     async with aiohttp.ClientSession(headers=headers) as session:
  14.         params = {"text": text}
  15.         responses = []
  16.         jobs = []
  17.         for page in range(1, pages + 1):
  18.             params['page'] = page
  19.             responses.append(session.get(url, params=params))
  20.         for response in asyncio.as_completed(responses):
  21.             response = await response
  22.             if response.status == 200:
  23.                 soup = bs(await response.text(), "lxml")
  24.                 divs = soup.find_all("div", attrs={"data-qa": "vacancy-serp__vacancy"}) + soup.find_all("div", attrs={"data-qa": "vacancy-serp__vacancy vacancy-serp__vacancy_premium"})
  25.                 for div in divs:
  26.                     title_tag = div.find("a", attrs={"data-qa": "vacancy-serp__vacancy-title"})
  27.                     title = title_tag.text
  28.                     href = title_tag['href']
  29.                     company = div.find("a", attrs={"data-qa": "vacancy-serp__vacancy-employer"}).text
  30.                     text1 = div.find("div", attrs={"data-qa": "vacancy-serp__vacancy_snippet_responsibility"}).text
  31.                     text2 = div.find("div", attrs={"data-qa": "vacancy-serp__vacancy_snippet_requirement"}).text
  32.                     content = text1 + " " + text2
  33.                     jobs.append({
  34.                         "title": title,
  35.                         "href": href,
  36.                         "company": company,
  37.                         "content": content
  38.                     })
  39.             else:
  40.                 ...
  41.         return jobs
  42.  
  43. result = asyncio.run(parse_hhru("python"))
  44. print(result)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement