Advertisement
Guest User

Untitled

a guest
Dec 7th, 2023
198
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.39 KB | Sports | 0 0
  1. import nfl_data_py as nfl
  2. import numpy as np
  3. import pandas as pd
  4. from sklearn.linear_model import LinearRegression
  5. import matplotlib.pyplot as plt
  6. import seaborn as sns
  7.  
  8. years = list(range(2018, 2023))
  9.  
  10. nfl_data = nfl.import_pbp_data(years)
  11.  
  12. pbp = nfl_data[(nfl_data['pass'] == 1) | (nfl_data['rush'] == 1)]
  13.  
  14. pbp = pbp.dropna(subset=['offense_personnel']).copy()
  15.  
  16. def extract_offense_personnel(personnel):
  17.     rb_te_counts = {'RB': 0, 'TE': 0} # Creating a dictionary for how personnel packages are named
  18.     personnel = personnel.strip() #Make sure the "personnel" has all other unneeded characters removed
  19.     positions = personnel.split(',') #Split by comma to show different positions
  20.     for position in positions:
  21.         try:
  22.             count, position = position.strip().split(' ')
  23.             if 'RB' in position:
  24.                 rb_te_counts['RB'] += int(count) #Count how many RBs in a formation/package
  25.             elif 'TE' in position:
  26.                 rb_te_counts['TE'] += int(count) #Count how many TEs in a formation/package
  27.         except ValueError:
  28.             pass
  29.     return f"{rb_te_counts['RB']}{rb_te_counts['TE']}" #name the personnel
  30.  
  31. pbp['personnel'] = pbp['offense_personnel'].apply(lambda x: extract_offense_personnel(x))
  32.  
  33. epa_data_by_game = pbp.groupby(['posteam', 'week', 'season', 'game_id'])['epa'].mean().reset_index()
  34.  
  35. epa_data = epa_data_by_game.groupby(['posteam', 'season'])['epa'].mean().reset_index()
  36.  
  37. winning_data = pbp.groupby(['posteam', 'season'])['posteam_score'].mean().reset_index() #This is calculating offense points not total points
  38.  
  39. x = winning_data[['posteam_score']].values
  40. y = epa_data[['epa']].values
  41.  
  42. regression = LinearRegression()
  43. regression.fit(x, y)
  44. model = regression.predict(x)
  45.  
  46. print('Coefficients:', regression.coef_)
  47. print('Intercept:', regression.intercept_)
  48. print('R^2 value:', regression.score(x, y))
  49.  
  50. efficiency = pbp.groupby(['posteam', 'season', 'game_id', 'week', 'personnel'])['epa'].mean().reset_index()
  51.  
  52. season_efficiency = pbp.groupby(['posteam', 'season', 'personnel'])['epa'].mean().reset_index()
  53.  
  54. plays = pbp.groupby(['posteam',  'season', 'game_id', 'week', 'personnel'])['play_id'].count().reset_index()
  55.  
  56. total_plays = pbp.groupby(['posteam',  'season', 'game_id', 'week'])['play_id'].count().reset_index()
  57.  
  58. usage_rate = plays['play_id']/total_plays['play_id']
  59.  
  60. efficiency['usage_rate'] = usage_rate
  61.  
  62.  
  63.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement