Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from selenium import webdriver
- from bs4 import BeautifulSoup as bs
- browser = webdriver.Chrome()
- class GameData:
- def __init__(self):
- self.date = []
- self.time = []
- self.game = []
- self.score = []
- self.home_odds = []
- self.draw_odds = []
- self.away_odds = []
- self.country = []
- self.league = []
- def parse_data(url):
- browser.get(url)
- df = pd.read_html(browser.page_source, header=0)[0]
- html = browser.page_source
- soup = bs(html, "lxml")
- cont = soup.find('div', {'id': 'wrap'})
- content = cont.find('div', {'id': 'col-content'})
- content = content.find('table', {'class': 'table-main'}, {'id': 'tournamentTable'})
- main = content.find('th', {'class': 'first2 tl'})
- if main is None:
- return None
- count = main.findAll('a')
- country = count[1].text
- league = count[2].text
- game_data = GameData()
- game_date = None
- for row in df.itertuples():
- if not isinstance(row[1], str):
- continue
- elif ':' not in row[1]:
- game_date = row[1].split('-')[0]
- continue
- game_data.date.append(game_date)
- game_data.time.append(row[1])
- game_data.game.append(row[2])
- game_data.score.append(row[3])
- game_data.home_odds.append(row[4])
- game_data.draw_odds.append(row[5])
- game_data.away_odds.append(row[6])
- game_data.country.append(country)
- game_data.league.append(league)
- return game_data
- urls = {
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/1",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/2",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/3",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/4",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/5",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/6",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/7",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/8",
- "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/9",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/1",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/2",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/3",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/4",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/5",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/6",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/7",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/8",
- "https://www.oddsportal.com/soccer/england/premier-league-2019-2020/results/#/page/9",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/1",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/2",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/3",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/4",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/5",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/6",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/7",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/8",
- "https://www.oddsportal.com/soccer/england/premier-league-2018-2019/results/#/page/9",
- }
- if __name__ == '__main__':
- results = None
- for url in urls:
- game_data = parse_data(url)
- if game_data is None:
- continue
- result = pd.DataFrame(game_data.__dict__)
- if results is None:
- results = result
- else:
- results = results.append(result, ignore_index=True)
- print(results)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement