# Untitled

a guest Sep 11th, 2019 111 Never
1. import pandas as pd
2. import re
3.
4. # read data from data.csv saved by the parser
5. class Analyzer:
6.
7.     def __init__(self):
9.         self.data = pd.read_csv('data.csv', index_col=0)
10.
11.     """
12.     skills sorted by its salary
13.     """
14.     def salary(self):
15.         dictionary = {}
16.         for idx,line in self.data.iterrows():
17.             if line['Average'] > 0:
18.                 skill_set = self.findSkill(line['Summary'])
19.                 for s in skill_set:
20.                     pair = dictionary.get(s,(0,0))
21.                     dictionary[s] = (pair[0]+line['Average'],pair[1]+1)
22.         for key, val in dictionary.items():
23.             dictionary[key] = (val[0]/val[1], val[1])
24.         sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1][0])
25.         res = ''
26.         for skill, salary in sorted_dic:
27.             res += skill + ': \$' + str(int(salary[0])) + ', sample size: '+str(salary[1]) + '\n'
28.         return res
29.
30.     """
31.     skills sorted by its frequency
32.     """
33.     def frequency(self):
34.         dictionary = {}
35.         for idx,line in self.data.iterrows():
36.             skill_set = self.findSkill(line['Summary'])
37.             for s in skill_set:
38.                 dictionary[s] = dictionary.get(s,0) + 1
39.         sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1])
40.         res = ''
41.         for skill, freq in sorted_dic:
42.             res += skill + ': ' +str(freq) + '\n'
43.         return res
44.
45.     def findSkill(self, summary):
46.         summary = re.split('\(|\)|\.|; |, |\/|!| ',summary)
47.         return set([x for x in summary if x in list(self.skills[0])])
