samipote

Untitled

Mar 19th, 2024
390
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.27 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.ensemble import RandomForestClassifier
  4. from sklearn.metrics import accuracy_score
  5.  
  6. # Load data
  7. season_stats = pd.read_csv("data/cbbm/2010.csv")
  8. tournament_results = pd.read_csv("data/kaggle/MNCAATourneyCompactResults.csv")
  9. seeds = pd.read_csv("data/kaggle/MNCAATourneySeeds.csv")
  10. team_names = pd.read_csv("data/kaggle/MTeams.csv")
  11.  
  12. # Merge team names with seeds
  13. seeds = pd.merge(seeds, team_names, on="TeamID")
  14.  
  15. # Merge team names with tournament results for winning and losing teams
  16. tournament_results = pd.merge(tournament_results, team_names, left_on="WTeamID", right_on="TeamID")
  17. tournament_results = pd.merge(tournament_results, team_names, left_on="LTeamID", right_on="TeamID", suffixes=("_W", "_L"))
  18.  
  19. # Feature engineering (if needed)
  20.  
  21. # Merge seeds with season stats
  22. season_stats = pd.merge(season_stats, seeds, left_on="TeamName", right_on="TeamName")
  23.  
  24. # Define features and target
  25. features = ['Games Won', 'Win %', 'SoS', 'Pts / Game', 'Opp Pts / Game', 'FG %', '3Pt FG %',
  26.             'Free Throw %', 'Offense Rating', 'Defense Rating', 'Adj. Score Margin', 'Rebound %',
  27.             'Off. Rebound %', 'Effective FG %', 'True Shooting %', 'Opp. True Shoot %', 'Pace',
  28.             'Turnover %', 'Opp. Turnover %', 'Turnover Margin', 'Assist %', 'Assists / Turnover',
  29.             'FT / FGA', 'Opp. FT / FGA']
  30. target = 'WinningTeam'  # Assuming you want to predict the winning team
  31.  
  32. # Prepare data for modeling
  33. # Create a label for winning team
  34. season_stats['WinningTeam'] = 1
  35.  
  36. # Create a label for losing team
  37. tournament_results['WinningTeam'] = 0
  38.  
  39. # Combine winning and losing teams data
  40. all_data = pd.concat([season_stats[features + [target]], tournament_results[features + [target]]])
  41.  
  42. # Split data into training and testing sets
  43. X_train, X_test, y_train, y_test = train_test_split(all_data[features], all_data[target], test_size=0.2, random_state=42)
  44.  
  45. # Train Random Forest Classifier
  46. clf = RandomForestClassifier(n_estimators=100, random_state=42)
  47. clf.fit(X_train, y_train)
  48.  
  49. # Evaluate model
  50. y_pred = clf.predict(X_test)
  51. accuracy = accuracy_score(y_test, y_pred)
  52. print("Accuracy:", accuracy)
  53.  
  54. # Now you can use this model to predict game results for the current season.
  55.  
Advertisement
Add Comment
Please, Sign In to add comment