SHARE
TWEET

Untitled

a guest Sep 11th, 2019 111 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2. import re
  3.  
  4. # read data from data.csv saved by the parser
  5. class Analyzer:
  6.    
  7.     def __init__(self):
  8.         self.skills = pd.read_csv("skills.txt",header=None,delimiter="\t")
  9.         self.data = pd.read_csv('data.csv', index_col=0)
  10.    
  11.     """
  12.     skills sorted by its salary
  13.     """
  14.     def salary(self):
  15.         dictionary = {}
  16.         for idx,line in self.data.iterrows():
  17.             if line['Average'] > 0:
  18.                 skill_set = self.findSkill(line['Summary'])
  19.                 for s in skill_set:
  20.                     pair = dictionary.get(s,(0,0))
  21.                     dictionary[s] = (pair[0]+line['Average'],pair[1]+1)
  22.         for key, val in dictionary.items():
  23.             dictionary[key] = (val[0]/val[1], val[1])
  24.         sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1][0])
  25.         res = ''
  26.         for skill, salary in sorted_dic:
  27.             res += skill + ': $' + str(int(salary[0])) + ', sample size: '+str(salary[1]) + '\n'
  28.         return res
  29.  
  30.     """
  31.     skills sorted by its frequency
  32.     """    
  33.     def frequency(self):
  34.         dictionary = {}
  35.         for idx,line in self.data.iterrows():
  36.             skill_set = self.findSkill(line['Summary'])
  37.             for s in skill_set:
  38.                 dictionary[s] = dictionary.get(s,0) + 1
  39.         sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1])
  40.         res = ''
  41.         for skill, freq in sorted_dic:
  42.             res += skill + ': ' +str(freq) + '\n'
  43.         return res
  44.  
  45.     def findSkill(self, summary):
  46.         summary = re.split('\(|\)|\.|; |, |\/|!| ',summary)
  47.         return set([x for x in summary if x in list(self.skills[0])])
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top