Reddit Salary Parser

#!/usr/bin/python

import matplotlib.pyplot as plt
import praw
import re

LINK = 'https://www.reddit.com/r/FortCollins/comments/11mkrfu/salary_transparency_thread/'
ID = '11mkrfu'
SALARY_BUCKETS = [10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000,
                  100000, 125000, 150000, 200000, 300000, 400000, 500000, 10000000]

reddit = praw.Reddit('me')

submission = reddit.submission(ID)

salaries = list()

for comment in submission.comments:
    for word in comment.body.split():
        # filter out age/sex:
        if re.search('[0-9]+[+]*[m|M|f|F]+', word):
            continue

        # filter out other odd answers:
        if word[-1] == ':' or word.find('hr') >= 0 or \
           word.find('hour') >= 0 or word.find('yrs') >= 0 or \
           word.find('YOE') >= 0:
            continue

        if re.search('[0-9]+[k|K]*', word):
            if word.find('k') >= 0 or word.find('K') >= 0:
                times1000 = True
            else:
                times1000 = False

            word = re.sub('[^0-9]', '', word)
            word = int(word)

            if times1000:
                word *= 1000

            # arbitrary bottom limit.  Some entries may still sneak through
            # the above filters.  If the salary is < $5k, throw it out
            if word < 5000:
                continue
            # ditto for a high limit:
            if word > 1000000:
                continue

            salaries.append(word)

buckets = [0] * len(SALARY_BUCKETS)
for salary in salaries:
    for idx, bucket in enumerate(SALARY_BUCKETS):
        if salary <= bucket:
            buckets[idx] += 1
            break

#plt.scatter(salaries, [1]*len(salaries))
#plt.show()

labels = list()
for idx, bucket in enumerate(SALARY_BUCKETS):
    if idx == 0:
        labels.append('\$0 to \${:.0f}k'.format(bucket / 1000))
    else:
        labels.append('\${:.0f}k to \${:.0f}k'.format(SALARY_BUCKETS[idx -1] / 1000,
                      bucket / 1000))

fig = plt.figure()
plt.bar(labels, buckets, align='center', alpha=0.5)
plt.xticks(rotation='vertical')
fig.subplots_adjust(bottom=0.3)
plt.ylabel('Number of respondents')
plt.show()