Advertisement
Guest User

Untitled

a guest
Sep 11th, 2019
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.68 KB | None | 0 0
  1. import pandas as pd
  2. import re
  3.  
  4. # read data from data.csv saved by the parser
  5. class Analyzer:
  6.  
  7. def __init__(self):
  8. self.skills = pd.read_csv("skills.txt",header=None,delimiter="\t")
  9. self.data = pd.read_csv('data.csv', index_col=0)
  10.  
  11. """
  12. skills sorted by its salary
  13. """
  14. def salary(self):
  15. dictionary = {}
  16. for idx,line in self.data.iterrows():
  17. if line['Average'] > 0:
  18. skill_set = self.findSkill(line['Summary'])
  19. for s in skill_set:
  20. pair = dictionary.get(s,(0,0))
  21. dictionary[s] = (pair[0]+line['Average'],pair[1]+1)
  22. for key, val in dictionary.items():
  23. dictionary[key] = (val[0]/val[1], val[1])
  24. sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1][0])
  25. res = ''
  26. for skill, salary in sorted_dic:
  27. res += skill + ': $' + str(int(salary[0])) + ', sample size: '+str(salary[1]) + '\n'
  28. return res
  29.  
  30. """
  31. skills sorted by its frequency
  32. """
  33. def frequency(self):
  34. dictionary = {}
  35. for idx,line in self.data.iterrows():
  36. skill_set = self.findSkill(line['Summary'])
  37. for s in skill_set:
  38. dictionary[s] = dictionary.get(s,0) + 1
  39. sorted_dic = sorted(dictionary.items(), key = lambda pair:-pair[1])
  40. res = ''
  41. for skill, freq in sorted_dic:
  42. res += skill + ': ' +str(freq) + '\n'
  43. return res
  44.  
  45. def findSkill(self, summary):
  46. summary = re.split('\(|\)|\.|; |, |\/|!| ',summary)
  47. return set([x for x in summary if x in list(self.skills[0])])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement