Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import datetime
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- from collections import Counter, OrderedDict
- all_files = ['season-0910.csv',
- 'season-1011.csv',
- 'season-1112.csv',
- 'season-1213.csv',
- 'season-1314.csv',
- 'season-1415.csv',
- 'season-1516.csv',
- 'season-1617.csv',
- 'season-1718.csv',
- 'season-1819.csv']
- year_list = [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
- df = pd.concat((pd.read_csv("archive\\"+f) for f in all_files))
- df['Date'] = pd.to_datetime(df['Date'])
- festive = []
- normal = []
- for year in year_list:
- festive.append(df[(df['Date'] >= pd.to_datetime(datetime.date(year, 12, 1))) & (df['Date'] <= pd.to_datetime(datetime.date(year+1, 1, 31)))])
- normal.append(df[(df['Date'] >= pd.to_datetime(datetime.date(year, 1, 31))) & (df['Date'] <= pd.to_datetime(datetime.date(year, 12, 1)))])
- # ignoring before Christmas 2009 b/c cba
- festive_df = pd.concat(games for games in festive)
- normal_df = pd.concat(games for games in normal)
- def get_cs(df):
- clean_sheets = []
- for _, row in df.iterrows():
- if row['FTHG'] == 0:
- clean_sheets.append(row['AwayTeam'])
- if row['FTAG'] == 0:
- clean_sheets.append(row['HomeTeam'])
- return clean_sheets
- festive_clean_sheets = get_cs(festive_df)
- normal_clean_sheets = get_cs(normal_df)
- print("Festive CS: {}%".format((len(festive_clean_sheets) / festive_df.shape[0]) * 100 ))
- print("Normal CS: {}%".format((len(normal_clean_sheets) / normal_df.shape[0]) * 100 ))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement