Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import re
- import math
- from collections import Counter
- csv_file = 'vacancies_big.csv'
- def get_suffix_by_rubles(count):
- if count % 10 == 0 or 5 <= count % 10 <= 9 or 10 <= count % 100 <= 19:
- return 'рублей'
- elif 2 <= count % 10 <= 4:
- return 'рубля'
- else:
- return 'рубль'
- def get_suffix_by_count(count):
- if count % 10 == 0 or 5 <= count % 10 <= 9 or 10 <= count % 100 <= 19:
- return 'раз'
- elif 2 <= count % 10 <= 4:
- return 'раза'
- else:
- return 'раз'
- def get_list_by_salaries(current_list: list, is_high_salary: bool):
- items_counter = 0
- list_sorted_by_average_salary = sorted(
- current_list,
- key=lambda salary: math.floor((float(salary[6]) + float(salary[7])) / 2),
- reverse=is_high_salary)
- for vacancy in list_sorted_by_average_salary:
- if vacancy[9] != 'RUR':
- continue
- if items_counter == 10:
- break
- items_counter += 1
- average_salary = math.floor((float(vacancy[6]) + float(vacancy[7])) / 2)
- print(f'\t{items_counter}) {vacancy[0]} в компании \"{vacancy[5]}\" - '
- f'{average_salary} {get_suffix_by_rubles(average_salary)} (г. {vacancy[10]})')
- def get_top_mentioned_skills(current_list: list):
- items_counter = 0
- all_skills = list()
- for vacancy in current_list:
- skills = vacancy[2].split(', ')
- for skill in skills:
- all_skills.append(skill)
- statistic = dict(Counter(all_skills).most_common())
- for key, count in statistic.items():
- if items_counter != 10:
- items_counter += 1
- print(f'\t{items_counter}) {key} - '
- f'упоминается {count} {get_suffix_by_count(count)}')
- else:
- break
- def get_cities_by_salaries(current_list: list):
- items_counter = 0
- all_cities = set()
- list_sorted_by_average_salary = sorted(
- current_list,
- key=lambda salary: math.floor((float(salary[6]) + float(salary[7])) / 2),
- reverse=True)
- for vacancy in list_sorted_by_average_salary:
- if all_cities.__contains__(vacancy[10]):
- continue
- else:
- all_cities.add(vacancy[10])
- if vacancy[9] != 'RUR':
- continue
- if items_counter == 10:
- break
- items_counter += 1
- average_salary = math.floor((float(vacancy[6]) + float(vacancy[7])) / 2)
- print(f'\t{items_counter}) {vacancy[10]} - средняя зарплата '
- f'{average_salary} {get_suffix_by_rubles(average_salary)} '
- f'(1 вакансия)')
- def get_cities_count(current_list: list) -> int:
- all_cities = set()
- for vacancy in current_list:
- all_cities.add(vacancy[10])
- return len(all_cities)
- def get_skills_count(current_list: list) -> int:
- all_skills = set()
- for vacancy in current_list:
- skills = vacancy[2].split(', ')
- for skill in skills:
- all_skills.add(skill)
- return len(all_skills)
- vacancies_list = list()
- with open(csv_file, 'r', encoding='utf-8') as f:
- reader = csv.reader(f)
- title = next(reader)
- data, title[0] = [], 'name'
- html_tags = re.compile(r'<[^>]+>')
- for line in reader:
- if len(line) < len(title):
- continue
- is_correct_string = True
- for index, value in enumerate(line):
- normal_string = re.sub(html_tags, '', value)\
- .replace('\n', ', ')\
- .replace('\r\n', ', ')
- normal_string = ' '.join(normal_string.split())
- line[index] = normal_string
- if len(normal_string) == 0:
- is_correct_string = False
- break
- if is_correct_string:
- vacancies_list.append(line)
- print('Самые высокие зарплаты:')
- get_list_by_salaries(vacancies_list, True)
- print()
- print('Самые низкие зарплаты:')
- get_list_by_salaries(vacancies_list, False)
- print()
- print(f'Из {get_skills_count(vacancies_list)} скиллов, самыми популярными являются:')
- get_top_mentioned_skills(vacancies_list)
- print()
- print(f'Из {get_cities_count(vacancies_list)} городов, самые высокие средние ЗП:')
- get_cities_by_salaries(vacancies_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement