Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from pprint import pprint
- from bs4 import BeautifulSoup as bs
- # https://izhevsk.hh.ru/search/vacancy?L_save_area=true&clusters=true&enable_snippets=true&text=python&showClusters=false
- url = 'https://izhevsk.hh.ru/search/vacancy ? L_save_area=true&clusters=true&enable_snippets=true&text=python&showClusters=false'
- # job = input('Введите желаемую должность ')
- job = 'python'
- main_url = 'https://izhevsk.hh.ru'
- r_params = {'L_save_area':'true',
- 'clusters':'true',
- 'enable_snippets':'true',
- 'text': job,
- 'showClusters':'false',
- 'page':'0'}
- r_headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36'}
- response = requests.get(main_url + '/search/vacancy', params=r_params, headers=r_headers)
- soup = bs(response.text, 'html.parser')
- cnt = 1
- all_vacansies = []
- while True:
- print(f'Страница {cnt} ')
- vacansy_list = soup.find_all('div',{'class':'vacancy-serp-item'})
- for vacansy in vacansy_list:
- vacansy_data = {}
- link = vacansy.find('a')
- vacansy_href = link.get('href')
- vacansy_name = link.getText()
- salary_data = vacansy.find('span', {'data-qa' : 'vacancy-serp__vacancy-compensation'})
- if salary_data is not None:
- vacansy_salary = salary_data.getText().replace('\xa0','')
- salary_text = ''.join(vacansy_salary.split()[:-1])
- vacansy_data['salary_cur'] = vacansy_salary.split()[-1].replace('.','')
- if salary_text.find('-') > -1:
- vacansy_data['salary_min'] = int(salary_text.split('-')[0])
- vacansy_data['salary_max'] = int(salary_text.split('-')[-1])
- elif salary_text.find('от') > -1:
- vacansy_data['salary_min'] = int(salary_text[2:])
- vacansy_data['salary_max'] = None
- elif salary_text.find('до') > -1:
- vacansy_data['salary_min'] = None
- vacansy_data['salary_max'] = int(salary_text[2:])
- else:
- vacansy_data['salary_min'] = None
- vacansy_data['salary_max'] = None
- vacansy_data['salary_cur'] = None
- vacansy_data['name'] = vacansy_name
- vacansy_data['href'] = vacansy_href
- # vacansy_data['salary'] = vacansy_salary
- all_vacansies.append(vacansy_data)
- print(all_vacansies[cnt*49])
- next_button = soup.find('a',{'class':'HH-Pager-Controls-Next'})
- if next_button is None:
- break
- else:
- next_link = main_url + next_button.get('href')
- response = requests.get(next_link, headers = r_headers)
- soup = bs(response.text, 'html.parser')
- cnt +=1
- pprint(all_vacansies)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement