Advertisement
Guest User

Untitled

a guest
Apr 26th, 2018
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.54 KB | None | 0 0
  1. import re
  2. import urllib.request
  3.  
  4.  
  5. def name_to_url(name):
  6. if ',' in name:
  7. x = name.index(',')
  8. last = name[0:x]
  9. first = name[x + 2:]
  10. full = first + '-' + last
  11. full = full.lower()
  12. return full
  13.  
  14. if ',' not in name and '-' not in name and ' ' in name:
  15. full = name.replace(' ', '-')
  16. full = full.lower()
  17. return full
  18.  
  19. if ',' not in name and ' ' not in name:
  20. full = name
  21. full = full.lower()
  22. return full
  23.  
  24.  
  25. def report(name):
  26. full = name_to_url(name)
  27. key = 'http://cs1110.cs.virginia.edu/files/uva2016/' + full
  28. try:
  29. page = urllib.request.urlopen(key)
  30. except:
  31. return None, 0, 0
  32.  
  33. jobregex = re.compile('Job title: ([A-Za-z 0-9-&]*)')
  34. moneyregex = re.compile('gross pay: \$([0-9\,]*)')
  35. rankregex = re.compile('rank</td><td>([0-9,]+) of [0-9,]*')
  36. rank = 0
  37.  
  38. for line in page:
  39. text = line.decode('utf-8').strip()
  40. text = text.replace("&amp;", '&')
  41. text = text.replace("&lt","<")
  42. text = text.replace("&gt",">")
  43. for a in jobregex.finditer(text):
  44. job = (a.group(1))
  45. for b in moneyregex.finditer(text):
  46. money = b.group(1)
  47. money = money.replace(',', '')
  48. money = float(money)
  49. if ' rank' in text:
  50. for c in rankregex.finditer(text):
  51. rank = c.group(1)
  52. rank = rank.replace(',', '')
  53. rank = int(rank)
  54. return job, money, rank
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement