Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import urllib.request
- def name_normalization(name_input):
- new_name_list = []
- if name_input.find(',') != -1:
- name_list = name_input.strip().split()
- for word in name_list:
- new_word = word.strip(", ").lower()
- new_name_list.append(new_word)
- new_name_list.append(new_name_list[0])
- new_name_list.remove(new_name_list[(len(new_name_list)-1)])
- if new_name_list[1].find('.') != -1:
- new_name_list[1] = new_name_list[1].replace('.', '')
- elif name_input.find('.') != -1:
- name_list = name_input.strip().split()
- for word in name_list:
- new_word = word.strip(". ").lower()
- new_name_list.append(new_word)
- elif name_input.find(' ') != -1:
- name_list = name_input.strip().split()
- for word in name_list:
- new_word = word.strip().lower()
- new_name_list.append(new_word)
- else:
- new_name_list.append(name_input)
- name = ''
- for i in range (len(new_name_list)):
- name += new_name_list[i]
- if i < (len(new_name_list) - 1):
- name += '-'
- return name
- def job_searcher(text):
- job_regex = r'<span class="small text-muted" id="personjob">([^<]*)</span.'
- job_finder = re.compile(job_regex)
- for match in job_finder.finditer(text):
- return match.group(1).replace('&', '&').replace(''', "'")
- def salary_searcher(text):
- salary_regex = r'<h2 class="pay" id="paytotal">([^<]*)</h2>'
- salary_finder = re.compile(salary_regex)
- for match in salary_finder.finditer(text):
- return float(match.group(1).replace(',', '').replace('$', ''))
- def rank_searcher(text):
- rank_regex = r'<tr><td>University of Virginia rank</td><td>([^\s]+) of 8,582<!--not null --></td></tr>'
- rank_finder = re.compile(rank_regex)
- rank_string = None
- for match in rank_finder.finditer(text):
- rank_string = match.group(1)
- if rank_string is None:
- return 0
- else:
- return int(rank_string.replace(',', ''))
- def report(name_input):
- name = name_normalization(name_input)
- url_base = 'http://cs1110.cs.virginia.edu/files/uva2018/'
- try:
- url = url_base + name
- with urllib.request.urlopen(url) as stream:
- text = stream.read().decode()
- job = job_searcher(text)
- salary = salary_searcher(text)
- rank = rank_searcher(text)
- return job, salary, rank
- except:
- return None, 0, 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement