Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import nfl_data_py as nfl
- import numpy as np
- import pandas as pd
- from sklearn.linear_model import LinearRegression
- import matplotlib.pyplot as plt
- import seaborn as sns
- years = list(range(2018, 2023))
- nfl_data = nfl.import_pbp_data(years)
- pbp = nfl_data[(nfl_data['pass'] == 1) | (nfl_data['rush'] == 1)]
- pbp = pbp.dropna(subset=['offense_personnel']).copy()
- def extract_offense_personnel(personnel):
- rb_te_counts = {'RB': 0, 'TE': 0} # Creating a dictionary for how personnel packages are named
- personnel = personnel.strip() #Make sure the "personnel" has all other unneeded characters removed
- positions = personnel.split(',') #Split by comma to show different positions
- for position in positions:
- try:
- count, position = position.strip().split(' ')
- if 'RB' in position:
- rb_te_counts['RB'] += int(count) #Count how many RBs in a formation/package
- elif 'TE' in position:
- rb_te_counts['TE'] += int(count) #Count how many TEs in a formation/package
- except ValueError:
- pass
- return f"{rb_te_counts['RB']}{rb_te_counts['TE']}" #name the personnel
- pbp['personnel'] = pbp['offense_personnel'].apply(lambda x: extract_offense_personnel(x))
- epa_data_by_game = pbp.groupby(['posteam', 'week', 'season', 'game_id'])['epa'].mean().reset_index()
- epa_data = epa_data_by_game.groupby(['posteam', 'season'])['epa'].mean().reset_index()
- winning_data = pbp.groupby(['posteam', 'season'])['posteam_score'].mean().reset_index() #This is calculating offense points not total points
- x = winning_data[['posteam_score']].values
- y = epa_data[['epa']].values
- regression = LinearRegression()
- regression.fit(x, y)
- model = regression.predict(x)
- print('Coefficients:', regression.coef_)
- print('Intercept:', regression.intercept_)
- print('R^2 value:', regression.score(x, y))
- efficiency = pbp.groupby(['posteam', 'season', 'game_id', 'week', 'personnel'])['epa'].mean().reset_index()
- season_efficiency = pbp.groupby(['posteam', 'season', 'personnel'])['epa'].mean().reset_index()
- plays = pbp.groupby(['posteam', 'season', 'game_id', 'week', 'personnel'])['play_id'].count().reset_index()
- total_plays = pbp.groupby(['posteam', 'season', 'game_id', 'week'])['play_id'].count().reset_index()
- usage_rate = plays['play_id']/total_plays['play_id']
- efficiency['usage_rate'] = usage_rate
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement