Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.model_selection import train_test_split
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.metrics import accuracy_score
- # Load data
- season_stats = pd.read_csv("data/cbbm/2010.csv")
- tournament_results = pd.read_csv("data/kaggle/MNCAATourneyCompactResults.csv")
- seeds = pd.read_csv("data/kaggle/MNCAATourneySeeds.csv")
- team_names = pd.read_csv("data/kaggle/MTeams.csv")
- # Merge team names with seeds
- seeds = pd.merge(seeds, team_names, on="TeamID")
- # Merge team names with tournament results for winning and losing teams
- tournament_results = pd.merge(tournament_results, team_names, left_on="WTeamID", right_on="TeamID")
- tournament_results = pd.merge(tournament_results, team_names, left_on="LTeamID", right_on="TeamID", suffixes=("_W", "_L"))
- # Feature engineering (if needed)
- # Merge seeds with season stats
- season_stats = pd.merge(season_stats, seeds, left_on="TeamName", right_on="TeamName")
- # Define features and target
- features = ['Games Won', 'Win %', 'SoS', 'Pts / Game', 'Opp Pts / Game', 'FG %', '3Pt FG %',
- 'Free Throw %', 'Offense Rating', 'Defense Rating', 'Adj. Score Margin', 'Rebound %',
- 'Off. Rebound %', 'Effective FG %', 'True Shooting %', 'Opp. True Shoot %', 'Pace',
- 'Turnover %', 'Opp. Turnover %', 'Turnover Margin', 'Assist %', 'Assists / Turnover',
- 'FT / FGA', 'Opp. FT / FGA']
- target = 'WinningTeam' # Assuming you want to predict the winning team
- # Prepare data for modeling
- # Create a label for winning team
- season_stats['WinningTeam'] = 1
- # Create a label for losing team
- tournament_results['WinningTeam'] = 0
- # Combine winning and losing teams data
- all_data = pd.concat([season_stats[features + [target]], tournament_results[features + [target]]])
- # Split data into training and testing sets
- X_train, X_test, y_train, y_test = train_test_split(all_data[features], all_data[target], test_size=0.2, random_state=42)
- # Train Random Forest Classifier
- clf = RandomForestClassifier(n_estimators=100, random_state=42)
- clf.fit(X_train, y_train)
- # Evaluate model
- y_pred = clf.predict(X_test)
- accuracy = accuracy_score(y_test, y_pred)
- print("Accuracy:", accuracy)
- # Now you can use this model to predict game results for the current season.
Advertisement
Add Comment
Please, Sign In to add comment