Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from bs4 import BeautifulSoup
- from urllib2 import urlopen
- from string import punctuation
- from requests import get
- roster = pd.read_csv(#file w/ player names)
- name_corrections = {
- 'matt-dellavedova' : 'matthew-dellavedova',
- 'marcelinho-huertas' : 'marcelo-huertas',
- 'derrick-jones' : 'derrick-jones-jr',
- 'john-lucas' : 'john-lucas-iii',
- 'james-mcadoo' : 'james-michael-mcadoo',
- 'raulzinho-neto' : 'raul-neto',
- 'otto-porter' : 'otto-porter-jr',
- 'glenn-robinson' : 'glenn-robinson-iii',
- 'domas-sabonis' : 'domantas-sabonis',
- 'lou-williams' : 'louis-williams',
- 'joe-young' : 'joseph-young',
- }
- url = 'http://projects.fivethirtyeight.com/carmelo/'
- def remove_punctuation(s):
- s = ''.join([i for i in s if i not in ".,'"])
- return s
- def process_id(roster):
- roster['538id'] = roster['Player'].apply(remove_punctuation)
- roster['538id'] = roster['538id'].apply(lambda x: x.replace(" ", "-"))
- roster['538id'] = roster['538id'].apply(lambda x: x.lower())
- roster['538id'].replace(name_corrections, inplace=True)
- process_id(roster)
- page = get(xurl)
- soup = BeautifulSoup(page, 'lxml')
- soup.findAll('div', class_='market-value')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement