Advertisement
Guest User

Untitled

a guest
Apr 25th, 2019
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.54 KB | None | 0 0
  1. import re
  2. import urllib.request
  3.  
  4.  
  5. def name_normalization(name_input):
  6. new_name_list = []
  7. if name_input.find(',') != -1:
  8. name_list = name_input.strip().split()
  9. for word in name_list:
  10. new_word = word.strip(", ").lower()
  11. new_name_list.append(new_word)
  12. new_name_list.append(new_name_list[0])
  13.  
  14. new_name_list.remove(new_name_list[(len(new_name_list)-1)])
  15.  
  16. if new_name_list[1].find('.') != -1:
  17. new_name_list[1] = new_name_list[1].replace('.', '')
  18. elif name_input.find('.') != -1:
  19. name_list = name_input.strip().split()
  20. for word in name_list:
  21. new_word = word.strip(". ").lower()
  22. new_name_list.append(new_word)
  23. elif name_input.find(' ') != -1:
  24. name_list = name_input.strip().split()
  25. for word in name_list:
  26. new_word = word.strip().lower()
  27. new_name_list.append(new_word)
  28. else:
  29. new_name_list.append(name_input)
  30. name = ''
  31. for i in range (len(new_name_list)):
  32. name += new_name_list[i]
  33. if i < (len(new_name_list) - 1):
  34. name += '-'
  35. return name
  36.  
  37.  
  38. def job_searcher(text):
  39. job_regex = r'<span class="small text-muted" id="personjob">([^<]*)</span.'
  40. job_finder = re.compile(job_regex)
  41. for match in job_finder.finditer(text):
  42. return match.group(1).replace('&amp;', '&').replace('&#39;', "'")
  43.  
  44.  
  45. def salary_searcher(text):
  46. salary_regex = r'<h2 class="pay" id="paytotal">([^<]*)</h2>'
  47. salary_finder = re.compile(salary_regex)
  48. for match in salary_finder.finditer(text):
  49. return float(match.group(1).replace(',', '').replace('$', ''))
  50.  
  51.  
  52. def rank_searcher(text):
  53. rank_regex = r'<tr><td>University of Virginia rank</td><td>([^\s]+) of 8,582<!--not null --></td></tr>'
  54. rank_finder = re.compile(rank_regex)
  55. rank_string = None
  56.  
  57. for match in rank_finder.finditer(text):
  58. rank_string = match.group(1)
  59. if rank_string is None:
  60. return 0
  61. else:
  62. return int(rank_string.replace(',', ''))
  63.  
  64.  
  65. def report(name_input):
  66. name = name_normalization(name_input)
  67. url_base = 'http://cs1110.cs.virginia.edu/files/uva2018/'
  68. try:
  69. url = url_base + name
  70. with urllib.request.urlopen(url) as stream:
  71. text = stream.read().decode()
  72. job = job_searcher(text)
  73. salary = salary_searcher(text)
  74. rank = rank_searcher(text)
  75. return job, salary, rank
  76. except:
  77. return None, 0, 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement