Guest User

Reddit Salary Parser

a guest
Mar 9th, 2023
232
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.20 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import matplotlib.pyplot as plt
  4. import praw
  5. import re
  6.  
  7. LINK = 'https://www.reddit.com/r/FortCollins/comments/11mkrfu/salary_transparency_thread/'
  8. ID = '11mkrfu'
  9. SALARY_BUCKETS = [10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000,
  10. 100000, 125000, 150000, 200000, 300000, 400000, 500000, 10000000]
  11.  
  12. reddit = praw.Reddit('me')
  13.  
  14. submission = reddit.submission(ID)
  15.  
  16. salaries = list()
  17.  
  18. for comment in submission.comments:
  19. for word in comment.body.split():
  20. # filter out age/sex:
  21. if re.search('[0-9]+[+]*[m|M|f|F]+', word):
  22. continue
  23.  
  24. # filter out other odd answers:
  25. if word[-1] == ':' or word.find('hr') >= 0 or \
  26. word.find('hour') >= 0 or word.find('yrs') >= 0 or \
  27. word.find('YOE') >= 0:
  28. continue
  29.  
  30. if re.search('[0-9]+[k|K]*', word):
  31. if word.find('k') >= 0 or word.find('K') >= 0:
  32. times1000 = True
  33. else:
  34. times1000 = False
  35.  
  36. word = re.sub('[^0-9]', '', word)
  37. word = int(word)
  38.  
  39. if times1000:
  40. word *= 1000
  41.  
  42. # arbitrary bottom limit. Some entries may still sneak through
  43. # the above filters. If the salary is < $5k, throw it out
  44. if word < 5000:
  45. continue
  46. # ditto for a high limit:
  47. if word > 1000000:
  48. continue
  49.  
  50. salaries.append(word)
  51.  
  52. buckets = [0] * len(SALARY_BUCKETS)
  53. for salary in salaries:
  54. for idx, bucket in enumerate(SALARY_BUCKETS):
  55. if salary <= bucket:
  56. buckets[idx] += 1
  57. break
  58.  
  59. #plt.scatter(salaries, [1]*len(salaries))
  60. #plt.show()
  61.  
  62. labels = list()
  63. for idx, bucket in enumerate(SALARY_BUCKETS):
  64. if idx == 0:
  65. labels.append('\$0 to \${:.0f}k'.format(bucket / 1000))
  66. else:
  67. labels.append('\${:.0f}k to \${:.0f}k'.format(SALARY_BUCKETS[idx -1] / 1000,
  68. bucket / 1000))
  69.  
  70. fig = plt.figure()
  71. plt.bar(labels, buckets, align='center', alpha=0.5)
  72. plt.xticks(rotation='vertical')
  73. fig.subplots_adjust(bottom=0.3)
  74. plt.ylabel('Number of respondents')
  75. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment