Untitled

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load data
season_stats = pd.read_csv("data/cbbm/2010.csv")
tournament_results = pd.read_csv("data/kaggle/MNCAATourneyCompactResults.csv")
seeds = pd.read_csv("data/kaggle/MNCAATourneySeeds.csv")
team_names = pd.read_csv("data/kaggle/MTeams.csv")

# Merge team names with seeds
seeds = pd.merge(seeds, team_names, on="TeamID")

# Merge team names with tournament results for winning and losing teams
tournament_results = pd.merge(tournament_results, team_names, left_on="WTeamID", right_on="TeamID")
tournament_results = pd.merge(tournament_results, team_names, left_on="LTeamID", right_on="TeamID", suffixes=("_W", "_L"))

# Feature engineering (if needed)

# Merge seeds with season stats
season_stats = pd.merge(season_stats, seeds, left_on="TeamName", right_on="TeamName")

# Define features and target
features = ['Games Won', 'Win %', 'SoS', 'Pts / Game', 'Opp Pts / Game', 'FG %', '3Pt FG %',
            'Free Throw %', 'Offense Rating', 'Defense Rating', 'Adj. Score Margin', 'Rebound %',
            'Off. Rebound %', 'Effective FG %', 'True Shooting %', 'Opp. True Shoot %', 'Pace',
            'Turnover %', 'Opp. Turnover %', 'Turnover Margin', 'Assist %', 'Assists / Turnover',
            'FT / FGA', 'Opp. FT / FGA']
target = 'WinningTeam'  # Assuming you want to predict the winning team

# Prepare data for modeling
# Create a label for winning team
season_stats['WinningTeam'] = 1

# Create a label for losing team
tournament_results['WinningTeam'] = 0

# Combine winning and losing teams data
all_data = pd.concat([season_stats[features + [target]], tournament_results[features + [target]]])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_data[features], all_data[target], test_size=0.2, random_state=42)

# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Now you can use this model to predict game results for the current season.