Advertisement
Orleon

Untitled

Feb 5th, 2021
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.80 KB | None | 0 0
  1. import requests
  2. from pprint import pprint
  3. from bs4 import BeautifulSoup as bs
  4.  
  5. # https://izhevsk.hh.ru/search/vacancy?L_save_area=true&clusters=true&enable_snippets=true&text=python&showClusters=false
  6.  
  7. url = 'https://izhevsk.hh.ru/search/vacancy ? L_save_area=true&clusters=true&enable_snippets=true&text=python&showClusters=false'
  8.  
  9. # job = input('Введите желаемую должность ')
  10. job = 'python'
  11. main_url = 'https://izhevsk.hh.ru'
  12.  
  13. r_params = {'L_save_area':'true',
  14.           'clusters':'true',
  15.           'enable_snippets':'true',
  16.           'text': job,
  17.           'showClusters':'false',
  18.           'page':'0'}
  19.  
  20. r_headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36'}
  21.  
  22. response = requests.get(main_url + '/search/vacancy', params=r_params, headers=r_headers)
  23. soup = bs(response.text, 'html.parser')
  24. cnt = 1
  25. all_vacansies = []
  26. while True:
  27.     print(f'Страница {cnt} ')
  28.     vacansy_list = soup.find_all('div',{'class':'vacancy-serp-item'})
  29.  
  30.  
  31.     for vacansy in vacansy_list:
  32.         vacansy_data = {}
  33.         link = vacansy.find('a')
  34.         vacansy_href = link.get('href')
  35.         vacansy_name = link.getText()
  36.         salary_data = vacansy.find('span', {'data-qa' : 'vacancy-serp__vacancy-compensation'})
  37.         if salary_data is not None:
  38.             vacansy_salary = salary_data.getText().replace('\xa0','')
  39.             salary_text = ''.join(vacansy_salary.split()[:-1])
  40.             vacansy_data['salary_cur'] = vacansy_salary.split()[-1].replace('.','')
  41.             if salary_text.find('-') > -1:
  42.                 vacansy_data['salary_min'] = int(salary_text.split('-')[0])
  43.                 vacansy_data['salary_max'] = int(salary_text.split('-')[-1])
  44.             elif salary_text.find('от') > -1:
  45.                 vacansy_data['salary_min'] = int(salary_text[2:])
  46.                 vacansy_data['salary_max'] = None
  47.             elif salary_text.find('до') > -1:
  48.                 vacansy_data['salary_min'] = None
  49.                 vacansy_data['salary_max'] = int(salary_text[2:])
  50.         else:
  51.             vacansy_data['salary_min'] = None
  52.             vacansy_data['salary_max'] = None
  53.             vacansy_data['salary_cur'] = None
  54.         vacansy_data['name'] = vacansy_name
  55.         vacansy_data['href'] = vacansy_href
  56.         # vacansy_data['salary'] = vacansy_salary
  57.         all_vacansies.append(vacansy_data)
  58.     print(all_vacansies[cnt*49])
  59.     next_button = soup.find('a',{'class':'HH-Pager-Controls-Next'})
  60.  
  61.     if next_button is None:
  62.         break
  63.     else:
  64.         next_link = main_url + next_button.get('href')
  65.         response = requests.get(next_link, headers = r_headers)
  66.         soup = bs(response.text, 'html.parser')
  67.         cnt +=1
  68.  
  69.  
  70.  
  71. pprint(all_vacansies)
  72.  
  73.  
  74.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement