# Untitled

Oct 2nd, 2022
915
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import csv
2. import re
3. import math
4. from collections import Counter
5.
6. csv_file = 'vacancies_big.csv'
7.
8.
9. def get_suffix_by_rubles(count):
10.     if count % 10 == 0 or 5 <= count % 10 <= 9 or 10 <= count % 100 <= 19:
11.         return 'рублей'
12.     elif 2 <= count % 10 <= 4:
13.         return 'рубля'
14.     else:
15.         return 'рубль'
16.
17.
18. def get_suffix_by_count(count):
19.     if count % 10 == 0 or 5 <= count % 10 <= 9 or 10 <= count % 100 <= 19:
20.         return 'раз'
21.     elif 2 <= count % 10 <= 4:
22.         return 'раза'
23.     else:
24.         return 'раз'
25.
26.
27. def get_list_by_salaries(current_list: list, is_high_salary: bool):
28.     items_counter = 0
29.     list_sorted_by_average_salary = sorted(
30.         current_list,
31.         key=lambda salary: math.floor((float(salary[6]) + float(salary[7])) / 2),
32.         reverse=is_high_salary)
33.
34.     for vacancy in list_sorted_by_average_salary:
35.         if vacancy[9] != 'RUR':
36.             continue
37.         if items_counter == 10:
38.             break
39.
40.         items_counter += 1
41.         average_salary = math.floor((float(vacancy[6]) + float(vacancy[7])) / 2)
42.
43.         print(f'\t{items_counter}) {vacancy[0]} в компании \"{vacancy[5]}\" - '
44.               f'{average_salary} {get_suffix_by_rubles(average_salary)} (г. {vacancy[10]})')
45.
46.
47. def get_top_mentioned_skills(current_list: list):
48.     items_counter = 0
49.     all_skills = list()
50.
51.     for vacancy in current_list:
52.         skills = vacancy[2].split(', ')
53.         for skill in skills:
54.             all_skills.append(skill)
55.
56.     statistic = dict(Counter(all_skills).most_common())
57.
58.     for key, count in statistic.items():
59.         if items_counter != 10:
60.             items_counter += 1
61.             print(f'\t{items_counter}) {key} - '
62.                   f'упоминается {count} {get_suffix_by_count(count)}')
63.         else:
64.             break
65.
66.
67. def get_cities_by_salaries(current_list: list):
68.     items_counter = 0
69.     all_cities = set()
70.     list_sorted_by_average_salary = sorted(
71.         current_list,
72.         key=lambda salary: math.floor((float(salary[6]) + float(salary[7])) / 2),
73.         reverse=True)
74.
75.     for vacancy in list_sorted_by_average_salary:
76.         if all_cities.__contains__(vacancy[10]):
77.             continue
78.         else:
80.             if vacancy[9] != 'RUR':
81.                 continue
82.             if items_counter == 10:
83.                 break
84.
85.         items_counter += 1
86.         average_salary = math.floor((float(vacancy[6]) + float(vacancy[7])) / 2)
87.
88.         print(f'\t{items_counter}) {vacancy[10]} - средняя зарплата '
89.               f'{average_salary} {get_suffix_by_rubles(average_salary)} '
90.               f'(1 вакансия)')
91.
92.
93. def get_cities_count(current_list: list) -> int:
94.     all_cities = set()
95.     for vacancy in current_list:
97.     return len(all_cities)
98.
99.
100. def get_skills_count(current_list: list) -> int:
101.     all_skills = set()
102.     for vacancy in current_list:
103.         skills = vacancy[2].split(', ')
104.         for skill in skills:
106.     return len(all_skills)
107.
108.
109. vacancies_list = list()
110.
111.
112. with open(csv_file, 'r', encoding='utf-8') as f:
115.     data, title[0] = [], 'name'
116.     html_tags = re.compile(r'<[^>]+>')
118.         if len(line) < len(title):
119.             continue
120.         is_correct_string = True
121.         for index, value in enumerate(line):
122.             normal_string = re.sub(html_tags, '', value)\
123.                 .replace('\n', ', ')\
124.                 .replace('\r\n', ', ')
125.             normal_string = ' '.join(normal_string.split())
126.             line[index] = normal_string
127.             if len(normal_string) == 0:
128.                 is_correct_string = False
129.                 break
130.         if is_correct_string:
131.             vacancies_list.append(line)
132.
133.
134. print('Самые высокие зарплаты:')
135. get_list_by_salaries(vacancies_list, True)
136. print()
137. print('Самые низкие зарплаты:')
138. get_list_by_salaries(vacancies_list, False)
139. print()
140. print(f'Из {get_skills_count(vacancies_list)} скиллов, самыми популярными являются:')
141. get_top_mentioned_skills(vacancies_list)
142. print()
143. print(f'Из {get_cities_count(vacancies_list)} городов, самые высокие средние ЗП:')
144. get_cities_by_salaries(vacancies_list)