Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import math
- import re
- from collections import Counter
- import numpy
- csv_file = input()
- def get_ruble_suffix(count) -> str:
- if count % 10 == 0 or \
- 5 <= count % 10 <= 9 or \
- 10 <= count % 100 <= 19:
- return 'рублей'
- elif 2 <= count % 10 <= 4:
- return 'рубля'
- else:
- return 'рубль'
- def get_vacancies_suffix(count: int) -> str:
- if 11 <= count % 100 <= 19:
- return "вакансий"
- elif count % 10 == 0 or 5 <= count % 10 <= 9:
- return "вакансий"
- elif 2 <= count % 10 <= 4:
- return "вакансии"
- else:
- return "вакансия"
- def get_times_suffix(count: int) -> str:
- if 11 <= count % 100 <= 19:
- return "раз"
- elif 2 <= count % 10 <= 4:
- return "раза"
- else:
- return "раз"
- def print_vacancies_by_salaries(list_vacancies: list, is_high_salary: bool):
- list_vacancies = sorted(list_vacancies,
- key=lambda x: x['avg_salary'],
- reverse=is_high_salary)
- print('Самые высокие зарплаты:' if is_high_salary else 'Самые низкие зарплаты:')
- counter = 0
- for index, value in enumerate(list_vacancies):
- if value['salary_currency'] != 'RUR':
- continue
- if counter == 10:
- break
- counter += 1
- print(
- f' {counter}) {value["name"]} в компании \"{value["employer_name"]}\" - {value["avg_salary"]} '
- f'{get_ruble_suffix(value["avg_salary"])} (г. {value["area_name"]})')
- print()
- def print_top_skills(list_vacancies: list):
- new_list = []
- for index, value in enumerate(list_vacancies):
- for j, v in enumerate(value["key_skills"]):
- new_list.append(v.strip())
- counter = Counter(new_list).most_common()
- dictionary = dict(counter)
- print(f'Из {len(dictionary)} скиллов, самыми популярными являются:')
- i = 0
- for key, value in dictionary.items():
- if i == 10:
- break
- i += 1
- print(f' {i}) {key} - упоминается {value} {get_times_suffix(value)}')
- print()
- def print_cities_by_salaries(input_list: list):
- list_vacancies = []
- for vac in input_list:
- if vac['salary_currency'] == 'RUR':
- list_vacancies.append(vac)
- list_vacancies = sorted(list_vacancies, key=lambda x: x["area_name"])
- temp_list = []
- counter = 0
- dict_cicties = {}
- for i in range(len(list_vacancies) + 1):
- if i == 0:
- continue
- if i != len(list_vacancies):
- if i == len(list_vacancies) - 1:
- if counter > 0:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (math.floor(numpy.average(temp_list)), counter)
- else:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (list_vacancies[i - 1]["avg_salary"], counter)
- temp_list = []
- counter = 0
- if list_vacancies[i - 1]["area_name"] == list_vacancies[i]["area_name"]:
- temp_list.append(float(list_vacancies[i - 1]["avg_salary"]))
- counter += 1
- else:
- temp_list.append(float(list_vacancies[i - 1]["avg_salary"]))
- counter += 1
- if counter > 0:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (math.floor(numpy.average(temp_list)), counter)
- else:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (list_vacancies[i - 1]["avg_salary"], counter)
- temp_list = []
- counter = 0
- else:
- temp_list.append(float(list_vacancies[i - 1]["avg_salary"]))
- counter += 1
- if counter > 0:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (math.floor(numpy.average(temp_list)), counter)
- else:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (list_vacancies[i - 1]["avg_salary"], counter)
- temp_list = []
- counter = 0
- big_cities = dict()
- for key, value in dict_cicties.items():
- if value[1] * 100 / len(list_vacancies) >= 0.9:
- big_cities[key] = value
- sorted_tuple = sorted(big_cities.items(),
- key=lambda x: (x[1][0], x[0]),
- reverse=True)
- for i, v in enumerate(sorted_tuple):
- try:
- if sorted_tuple[i - 1][1][0] == sorted_tuple[i][1][0]:
- temp1, temp2 = sorted_tuple[i - 1], sorted_tuple[i]
- sorted_tuple[i - 1] = temp2
- sorted_tuple[i] = temp1
- except:
- pass
- print(f'Из {len(dict_cicties)} городов, самые высокие средние ЗП:')
- index = 0
- for key, value in sorted_tuple:
- if index >= 10:
- break
- print(f' {index + 1}) {key} - средняя зарплата {value[0]} {get_ruble_suffix(value[0])} '
- f'({value[1]} {get_vacancies_suffix(value[1])})')
- index += 1
- print()
- def make_dictionary(titles, list_vac):
- dictionary = {}
- for index, title in enumerate(titles):
- dictionary[title] = list_vac[index]
- dictionary['avg_salary'] = math.floor((float(dictionary['salary_to']) + float(dictionary['salary_from'])) / 2)
- return dictionary
- with open(csv_file, encoding='utf-8-sig') as f:
- reader = csv.reader(f, delimiter=',')
- vacancies_list, title = [], next(reader)
- index_key_skills = title.index('key_skills')
- index_currency = title.index('salary_currency')
- html_tags = re.compile('<.*?>')
- for vacancy in reader:
- if vacancy[index_currency] != 'RUR':
- continue
- try:
- vacancy.remove('')
- except ValueError:
- pass
- for i in range(len(vacancy)):
- if i != index_key_skills:
- vacancy[i] = re.sub(html_tags, '', vacancy[i]).strip()
- vacancy[i] = ' '.join(vacancy[i].split())
- else:
- vacancy[i] = vacancy[i].strip()
- vacancy[i] = re.split("\n|\n\r", vacancy[i])
- if len(vacancy) == len(title):
- vacancies_list.append(make_dictionary(title, vacancy))
- print_vacancies_by_salaries(vacancies_list, True)
- print_vacancies_by_salaries(vacancies_list, False)
- print_top_skills(vacancies_list)
- print_cities_by_salaries(vacancies_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement