Advertisement
PyNoob1

Oddsportal Scraper

Oct 31st, 2022 (edited)
859
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.66 KB | Sports | 0 0
  1. import os
  2. import pandas as pd
  3. from selenium import webdriver
  4. from bs4 import BeautifulSoup as bs
  5.  
  6. browser = webdriver.Chrome()
  7.  
  8. class GameData:
  9.  
  10.     def __init__(self):
  11.         self.date = []
  12.         self.time = []
  13.         self.game = []
  14.         self.score = []
  15.         self.home_odds = []
  16.         self.draw_odds = []
  17.         self.away_odds = []
  18.         self.country = []
  19.         self.league = []
  20.  
  21.  
  22. def parse_data(url):
  23.     while True:
  24.         try:
  25.             browser.get(url)
  26.             df = pd.read_html(browser.page_source)[0]
  27.             break
  28.         except KeyError:
  29.             browser.quit()
  30.             continue
  31.     html = browser.page_source
  32.     soup = bs(html, "lxml")
  33.     cont = soup.find('div', {'id': 'wrap'})
  34.     content = cont.find('div', {'id': 'col-content'})
  35.     content = content.find('table', {'class': 'table-main'}, {'id': 'tournamentTable'})
  36.     main = content.find('th', {'class': 'first2 tl'})
  37.     if main is None:
  38.         return None
  39.     count = main.findAll('a')
  40.     country = count[1].text
  41.     league = count[2].text
  42.     game_data = GameData()
  43.     game_date = None
  44.     for row in df.itertuples():
  45.         if not isinstance(row[1], str):
  46.             continue
  47.         elif ':' not in row[1]:
  48.             game_date = row[1].split('-')[0]
  49.             continue
  50.         game_data.date.append(game_date)
  51.         game_data.time.append(row[1])
  52.         game_data.game.append(row[2])
  53.         game_data.score.append(row[3])
  54.         game_data.home_odds.append(row[4])
  55.         game_data.draw_odds.append(row[5])
  56.         game_data.away_odds.append(row[6])
  57.         game_data.country.append(country)
  58.         game_data.league.append(league)
  59.     return game_data
  60.  
  61. # Your urls go here. You can put in as many as you like
  62.  
  63. urls = {
  64. "https://www.oddsportal.com/soccer/england/premier-league/results/",
  65. "https://www.oddsportal.com/soccer/england/premier-league/results/#/page/2/",
  66. }
  67.  
  68. if __name__ == '__main__':
  69.  
  70.     results = None
  71.  
  72.     for url in urls:
  73.         try:
  74.             game_data = parse_data(url)
  75.             if game_data is None:
  76.                 continue
  77.             result = pd.DataFrame(game_data.__dict__)
  78.             if results is None:
  79.                 results = result
  80.             else:
  81.                 results = results.append(result, ignore_index=True)
  82.         except ValueError:
  83.             game_data = parse_data(url)
  84.             if game_data is None:
  85.                 continue
  86.             result = pd.DataFrame(game_data.__dict__)
  87.             if results is None:
  88.                 results = result
  89.             else:
  90.                 results = results.append(result, ignore_index=True)
  91.  
  92. results.to_csv(data.csv)
  93.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement