Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import math
- import re
- from collections import Counter
- import numpy
- def get_form_ruble(result_digit):
- last_digit = result_digit % 10
- last_two_digit = result_digit % 100
- if last_digit == 1 and (last_two_digit < 11 or last_two_digit > 19):
- return 'рубль'
- return 'рубля' if (2 <= last_digit <= 4) and (last_two_digit < 11 or last_two_digit > 19) \
- else 'рублей'
- def get_form_years(year):
- last_digit = year % 10
- if 1 < last_digit <= 4:
- return 'года'
- elif last_digit == 1:
- return 'год'
- return 'лет'
- def get_form_vacancies(count):
- count = int(count)
- last_digit = count % 10
- last_two_digit = count % 100
- if 2 <= last_digit <= 4:
- return 'вакансии'
- elif last_digit == 1 and last_two_digit != 11:
- return 'вакансия'
- return 'вакансий'
- def get_form_times(count):
- last_digit = count % 10
- last_two_digit = count % 100
- if 2 <= last_digit <= 4:
- return 'раза'
- return 'раз'
- def get_rating_salaries(list_vacancies, is_descending):
- list_vacancies = sorted(list_vacancies, key=lambda x: x["avg_salary"],
- reverse=is_descending)
- print('Самые высокие зарплаты:' if is_descending else 'Самые низкие зарплаты:')
- counter = 0
- for index, value in enumerate(list_vacancies):
- if value['salary_currency'] != 'RUR':
- continue
- if counter == 10:
- break
- counter += 1
- print(
- f' {counter}) {value["name"]} в компании \"{value["employer_name"]}\" - {value["avg_salary"]} '
- f'{get_form_ruble(value["avg_salary"])} (г. {value["area_name"]})') # проверить рубли
- print()
- def top_skills(list_vacancies):
- new_list = []
- for index, value in enumerate(list_vacancies):
- for j, v in enumerate(value["key_skills"]):
- new_list.append(v.strip())
- counter = Counter(new_list).most_common()
- dictionary = dict(counter)
- print(f'Из {len(dictionary)} скиллов, самыми популярными являются:')
- i = 0
- for key, value in dictionary.items():
- if i == 10:
- break
- i += 1
- print(f' {i}) {key} - упоминается {value} {get_form_times(value)}') # разы
- print()
- def get_rating_cities(input_list):
- list_vacancies = []
- for vac in input_list:
- if vac["salary_currency"] == 'RUR':
- list_vacancies.append(vac)
- list_vacancies = sorted(list_vacancies, key=lambda x: x["area_name"])
- temp_list = []
- counter = 0
- dict_cicties = {}
- for i in range(len(list_vacancies) + 1):
- if i == 0:
- continue
- if i != len(list_vacancies):
- if i == len(list_vacancies) - 1:
- if counter > 0:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (math.floor(numpy.average(temp_list)), counter)
- else:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (list_vacancies[i - 1]["avg_salary"], counter)
- temp_list = []
- counter = 0
- if list_vacancies[i - 1]["area_name"] == list_vacancies[i]["area_name"]:
- temp_list.append(float(list_vacancies[i - 1]["avg_salary"]))
- counter += 1
- else:
- temp_list.append(float(list_vacancies[i - 1]["avg_salary"]))
- counter += 1
- if counter > 0:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (math.floor(numpy.average(temp_list)), counter)
- else:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (list_vacancies[i - 1]["avg_salary"], counter)
- temp_list = []
- counter = 0
- else:
- temp_list.append(float(list_vacancies[i - 1]["avg_salary"]))
- counter += 1
- if counter > 0:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (math.floor(numpy.average(temp_list)), counter)
- else:
- dict_cicties[list_vacancies[i - 1]["area_name"]] = (list_vacancies[i - 1]["avg_salary"], counter)
- temp_list = []
- counter = 0
- big_city_life = {}
- for key, value in dict_cicties.items():
- if math.floor(value[1] * 100 / len(list_vacancies)) >= 1:
- big_city_life[key] = value
- sorted_tuple = dict(sorted(big_city_life.items(), key=lambda x: x[1], reverse=True))
- print(f'Из {len(dict_cicties)} городов, самые высокие средние ЗП:')
- index = 0
- for key, value in sorted_tuple.items():
- if index >= 10:
- break
- print(f' {index + 1}) {key} - средняя зарплата {value[0]} {get_form_ruble(value[0])} '
- f'({value[1]} {get_form_vacancies(value[1])})')
- index += 1
- print()
- def to_dict(titles, list_vac):
- dictionary = {}
- for index, title in enumerate(titles):
- dictionary[title] = list_vac[index]
- dictionary["avg_salary"] = math.floor((float(dictionary["salary_to"]) + float(dictionary["salary_from"])) / 2)
- return dictionary
- file_name = 'vacancies_ds.csv'
- with open(file_name, encoding='utf-8-sig') as read_file:
- file_reader = csv.reader(read_file, delimiter=",")
- result_list = []
- list_head = next(file_reader)
- index_key_skills = list_head.index('key_skills')
- html_tags = re.compile('<.*?>')
- for row in file_reader:
- try:
- row.remove('')
- except ValueError:
- pass
- for i in range(len(row)):
- if i != index_key_skills:
- row[i] = re.sub(html_tags, '', row[i])
- row[i] = row[i].strip()
- row[i] = ' '.join(row[i].split())
- else:
- row[i] = row[i].strip()
- row[i] = re.split("\n|\n\r", row[i])
- if len(row) == len(list_head):
- result_list.append(to_dict(list_head, row))
- get_rating_salaries(result_list, True)
- get_rating_salaries(result_list, False)
- top_skills(result_list)
- get_rating_cities(result_list)
Add Comment
Please, Sign In to add comment