Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import pandas as pd
- import numpy as np
- import re
- headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/574.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
- url = 'https://projects.fivethirtyeight.com/soccer-predictions/premier-league/'
- requestResult = requests.get(url, headers=headers)
- soup = BeautifulSoup(requestResult.content, "lxml")
- matchFrame = pd.DataFrame()
- homeProbs = []
- drawProbs = []
- awayProbs = []
- homeTeams = []
- awayTeams = []
- matches = soup.find_all('div', class_='match-container')
- for match in matches:
- homeProbs.append(convertToDecimal(match.find(class_="match-top").find(class_="prob").text))
- drawProbs.append(convertToDecimal(match.find(class_="match-top").find(class_="tie-prob").text))
- awayProbs.append(convertToDecimal(match.find(class_="match-bottom").find(class_="prob").text))
- homeTeams.append(match['data-team1'])
- awayTeams.append(match['data-team2'])
- matchFrame['homeTeam'] = pd.Series(homeTeams)
- matchFrame['awayTeam'] = pd.Series(awayTeams)
- matchFrame['homeProb'] = pd.Series(homeProbs)
- matchFrame['drawProb'] = pd.Series(drawProbs)
- matchFrame['awayProb'] = pd.Series(awayProbs)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement