Advertisement
mikhailemv

Untitled

Oct 2nd, 2022
1,097
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.45 KB | None | 0 0
  1. import csv
  2. import re
  3. import math
  4. from collections import Counter
  5.  
  6. csv_file = 'vacancies_big.csv'
  7.  
  8.  
  9. def get_suffix_by_rubles(count):
  10.     if count % 10 == 0 or 5 <= count % 10 <= 9 or 10 <= count % 100 <= 19:
  11.         return 'рублей'
  12.     elif 2 <= count % 10 <= 4:
  13.         return 'рубля'
  14.     else:
  15.         return 'рубль'
  16.  
  17.  
  18. def get_suffix_by_count(count):
  19.     if count % 10 == 0 or 5 <= count % 10 <= 9 or 10 <= count % 100 <= 19:
  20.         return 'раз'
  21.     elif 2 <= count % 10 <= 4:
  22.         return 'раза'
  23.     else:
  24.         return 'раз'
  25.  
  26.  
  27. def get_list_by_salaries(current_list: list, is_high_salary: bool):
  28.     items_counter = 0
  29.     list_sorted_by_average_salary = sorted(
  30.         current_list,
  31.         key=lambda salary: math.floor((float(salary[6]) + float(salary[7])) / 2),
  32.         reverse=is_high_salary)
  33.  
  34.     for vacancy in list_sorted_by_average_salary:
  35.         if vacancy[9] != 'RUR':
  36.             continue
  37.         if items_counter == 10:
  38.             break
  39.  
  40.         items_counter += 1
  41.         average_salary = math.floor((float(vacancy[6]) + float(vacancy[7])) / 2)
  42.  
  43.         print(f'\t{items_counter}) {vacancy[0]} в компании \"{vacancy[5]}\" - '
  44.               f'{average_salary} {get_suffix_by_rubles(average_salary)} (г. {vacancy[10]})')
  45.  
  46.  
  47. def get_top_mentioned_skills(current_list: list):
  48.     items_counter = 0
  49.     all_skills = list()
  50.  
  51.     for vacancy in current_list:
  52.         skills = vacancy[2].split(', ')
  53.         for skill in skills:
  54.             all_skills.append(skill)
  55.  
  56.     statistic = dict(Counter(all_skills).most_common())
  57.  
  58.     for key, count in statistic.items():
  59.         if items_counter != 10:
  60.             items_counter += 1
  61.             print(f'\t{items_counter}) {key} - '
  62.                   f'упоминается {count} {get_suffix_by_count(count)}')
  63.         else:
  64.             break
  65.  
  66.  
  67. def get_cities_by_salaries(current_list: list):
  68.     items_counter = 0
  69.     all_cities = set()
  70.     list_sorted_by_average_salary = sorted(
  71.         current_list,
  72.         key=lambda salary: math.floor((float(salary[6]) + float(salary[7])) / 2),
  73.         reverse=True)
  74.  
  75.     for vacancy in list_sorted_by_average_salary:
  76.         if all_cities.__contains__(vacancy[10]):
  77.             continue
  78.         else:
  79.             all_cities.add(vacancy[10])
  80.             if vacancy[9] != 'RUR':
  81.                 continue
  82.             if items_counter == 10:
  83.                 break
  84.  
  85.         items_counter += 1
  86.         average_salary = math.floor((float(vacancy[6]) + float(vacancy[7])) / 2)
  87.  
  88.         print(f'\t{items_counter}) {vacancy[10]} - средняя зарплата '
  89.               f'{average_salary} {get_suffix_by_rubles(average_salary)} '
  90.               f'(1 вакансия)')
  91.  
  92.  
  93. def get_cities_count(current_list: list) -> int:
  94.     all_cities = set()
  95.     for vacancy in current_list:
  96.         all_cities.add(vacancy[10])
  97.     return len(all_cities)
  98.  
  99.  
  100. def get_skills_count(current_list: list) -> int:
  101.     all_skills = set()
  102.     for vacancy in current_list:
  103.         skills = vacancy[2].split(', ')
  104.         for skill in skills:
  105.             all_skills.add(skill)
  106.     return len(all_skills)
  107.  
  108.  
  109. vacancies_list = list()
  110.  
  111.  
  112. with open(csv_file, 'r', encoding='utf-8') as f:
  113.     reader = csv.reader(f)
  114.     title = next(reader)
  115.     data, title[0] = [], 'name'
  116.     html_tags = re.compile(r'<[^>]+>')
  117.     for line in reader:
  118.         if len(line) < len(title):
  119.             continue
  120.         is_correct_string = True
  121.         for index, value in enumerate(line):
  122.             normal_string = re.sub(html_tags, '', value)\
  123.                 .replace('\n', ', ')\
  124.                 .replace('\r\n', ', ')
  125.             normal_string = ' '.join(normal_string.split())
  126.             line[index] = normal_string
  127.             if len(normal_string) == 0:
  128.                 is_correct_string = False
  129.                 break
  130.         if is_correct_string:
  131.             vacancies_list.append(line)
  132.  
  133.  
  134. print('Самые высокие зарплаты:')
  135. get_list_by_salaries(vacancies_list, True)
  136. print()
  137. print('Самые низкие зарплаты:')
  138. get_list_by_salaries(vacancies_list, False)
  139. print()
  140. print(f'Из {get_skills_count(vacancies_list)} скиллов, самыми популярными являются:')
  141. get_top_mentioned_skills(vacancies_list)
  142. print()
  143. print(f'Из {get_cities_count(vacancies_list)} городов, самые высокие средние ЗП:')
  144. get_cities_by_salaries(vacancies_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement