Advertisement
Guest User

Untitled

a guest
Dec 7th, 2016
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.20 KB | None | 0 0
  1. import pandas as pd
  2. from bs4 import BeautifulSoup
  3. from urllib2 import urlopen
  4. from string import punctuation
  5. from requests import get
  6.  
  7. roster = pd.read_csv(#file w/ player names)
  8.  
  9. name_corrections = {
  10.  
  11. 'matt-dellavedova' : 'matthew-dellavedova',
  12. 'marcelinho-huertas' : 'marcelo-huertas',
  13. 'derrick-jones' : 'derrick-jones-jr',
  14. 'john-lucas' : 'john-lucas-iii',
  15. 'james-mcadoo' : 'james-michael-mcadoo',
  16. 'raulzinho-neto' : 'raul-neto',
  17. 'otto-porter' : 'otto-porter-jr',
  18. 'glenn-robinson' : 'glenn-robinson-iii',
  19. 'domas-sabonis' : 'domantas-sabonis',
  20. 'lou-williams' : 'louis-williams',
  21. 'joe-young' : 'joseph-young',
  22.  
  23. }
  24.  
  25. url = 'http://projects.fivethirtyeight.com/carmelo/'
  26.  
  27. def remove_punctuation(s):
  28. s = ''.join([i for i in s if i not in ".,'"])
  29. return s
  30.  
  31. def process_id(roster):
  32.  
  33. roster['538id'] = roster['Player'].apply(remove_punctuation)
  34.  
  35. roster['538id'] = roster['538id'].apply(lambda x: x.replace(" ", "-"))
  36.  
  37. roster['538id'] = roster['538id'].apply(lambda x: x.lower())
  38.  
  39. roster['538id'].replace(name_corrections, inplace=True)
  40.  
  41.  
  42. process_id(roster)
  43.  
  44. page = get(xurl)
  45.  
  46. soup = BeautifulSoup(page, 'lxml')
  47.  
  48. soup.findAll('div', class_='market-value')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement