Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import re
- # read data from data.csv saved by the parser
- class Analyzer:
- def __init__(self):
- self.skills = pd.read_csv("skills.txt",header=None,delimiter="\t")
- self.data = pd.read_csv('data.csv', index_col=0)
- """
- skills sorted by its salary
- """
- def salary(self):
- dictionary = {}
- for idx,line in self.data.iterrows():
- if line['Average'] > 0:
- skill_set = self.findSkill(line['Summary'])
- for s in skill_set:
- pair = dictionary.get(s,(0,0))
- dictionary[s] = (pair[0]+line['Average'],pair[1]+1)
- for key, val in dictionary.items():
- dictionary[key] = (val[0]/val[1], val[1])
- sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1][0])
- res = ''
- for skill, salary in sorted_dic:
- res += skill + ': $' + str(int(salary[0])) + ', sample size: '+str(salary[1]) + '\n'
- return res
- """
- skills sorted by its frequency
- """
- def frequency(self):
- dictionary = {}
- for idx,line in self.data.iterrows():
- skill_set = self.findSkill(line['Summary'])
- for s in skill_set:
- dictionary[s] = dictionary.get(s,0) + 1
- sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1])
- res = ''
- for skill, freq in sorted_dic:
- res += skill + ': ' +str(freq) + '\n'
- return res
- def findSkill(self, summary):
- summary = re.split('\(|\)|\.|; |, |\/|!| ',summary)
- return set([x for x in summary if x in list(self.skills[0])])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement