Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import urllib.request
- def name_to_url(name):
- if ',' in name:
- x = name.index(',')
- last = name[0:x]
- first = name[x + 2:]
- full = first + '-' + last
- full = full.lower()
- return full
- if ',' not in name and '-' not in name and ' ' in name:
- full = name.replace(' ', '-')
- full = full.lower()
- return full
- if ',' not in name and ' ' not in name:
- full = name
- full = full.lower()
- return full
- def report(name):
- full = name_to_url(name)
- key = 'http://cs1110.cs.virginia.edu/files/uva2016/' + full
- try:
- page = urllib.request.urlopen(key)
- except:
- return None, 0, 0
- jobregex = re.compile('Job title: ([A-Za-z 0-9-&]*)')
- moneyregex = re.compile('gross pay: \$([0-9\,]*)')
- rankregex = re.compile('rank</td><td>([0-9,]+) of [0-9,]*')
- rank = 0
- for line in page:
- text = line.decode('utf-8').strip()
- text = text.replace("&", '&')
- text = text.replace("<","<")
- text = text.replace(">",">")
- for a in jobregex.finditer(text):
- job = (a.group(1))
- for b in moneyregex.finditer(text):
- money = b.group(1)
- money = money.replace(',', '')
- money = float(money)
- if ' rank' in text:
- for c in rankregex.finditer(text):
- rank = c.group(1)
- rank = rank.replace(',', '')
- rank = int(rank)
- return job, money, rank
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement