Untitled

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import numexpr as ne

start_time = time.time()
# Set display options
pd.set_option('display.max_rows', 30000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', None)

# Calculating Selection Thresholds
# File Paths
predictions_archive = predictions_archive.csv
train_data = historical_data_cleaned.csv
selection_weights_file = selection.csv

train_data = pd.read_csv(train_data)
selection_weights = pd.read_csv(selection_weights_file)
predictions_archive = pd.read_csv(predictions_archive)
train_data['datetime'] = pd.to_datetime(train_data['datetime'])
predictions_archive['match_datetime'] = pd.to_datetime(predictions_archive['match_datetime'])
# print(train_data.head())

# Print Missing Dates
# Create a date range for the past year
# start_date = datetime.now() - timedelta(days=365)
# end_date = datetime.now()
# date_range = pd.date_range(start=start_date, end=end_date)
#
# # Get the missing dates
# missing_dates = set(date_range.date) - set(train_data['datetime'].dt.date)
#
# # Print out the missing dates
# if missing_dates:
#     print("The following dates are missing:")
#     for date in sorted(missing_dates):
#         print(date.strftime('%Y-%m-%d'))
# else:
#     print("All dates for the past year are present in the dataframe.")

# Setting Selection Weights
grouped_train_data = train_data.groupby(['country', 'league']).apply(
    lambda x: x[['country', 'league']].iloc[0]).reset_index(drop=True)
print("Setting default selection Weights")
grouped_train_data = grouped_train_data.assign(Win=1.1, DNB=0.7, O_1_5=3.2, U_4_5=2.2)
# Save grouped_train dataset to csv for backtest
# grouped_train_data.to_csv(selection_weights_file, index=False) # Don't need to do it once done

# Merging train_data ith predictions_archive to get home_score & away_score against predictions
predicted = predictions_archive.merge(
    train_data[['datetime', 'home_score', 'away_score', 'country', 'league', 'home_team', 'away_team']],
    left_on=['match_datetime', 'country', 'league', 'home_team', 'away_team'],
    right_on=['datetime', 'country', 'league', 'home_team', 'away_team'],
    how='left')
predicted.drop('datetime', axis=1, inplace=True)  # Remove datetime column from train_data

# "merging and adding columns from grouped_train_data to predicted"

# Merge predicted and grouped_train_data dataframes
merged_data = pd.merge(predicted, grouped_train_data, on=['country', 'league'], how='left')
# Select the columns to add to predicted
grouped_train_columns = ['Win', 'DNB', 'O_1_5', 'U_4_5']
# Add the grouped_train_columns to predicted
predicted[grouped_train_columns] = merged_data[grouped_train_columns]
# Adding further columns
predicted['score_difference'] = abs(predicted['home_score'] - predicted['away_score'])
predicted['total_score'] = predicted['home_score'] + predicted['away_score']
predicted['predicted_score_difference'] = abs(predicted['predicted_home_score'] - predicted['predicted_away_score'])
predicted['predicted_total_score'] = predicted['predicted_home_score'] + predicted['predicted_away_score']
predicted['result'] = np.where(predicted['home_score'] > predicted['away_score'], 'home',
                               np.where(predicted['home_score'] < predicted['away_score'], 'away', 'draw'))
predicted['predicted_result'] = np.where(predicted['predicted_home_score'] > predicted['predicted_away_score'], 'home',
                                         np.where(predicted['predicted_home_score'] < predicted['predicted_away_score'],
                                                  'away', 'draw'))
predicted['result_match'] = np.where(predicted['result'] == predicted['predicted_result'], 'match', 'no match')


# Creating selection functions
def selection(row):
    if row["predicted_score_difference"] > row["Win"] and row["predicted_total_score"] > row["O_1_5"]:
        return "W & O 1.5"
    if row["predicted_score_difference"] > row["Win"]:
        return "W"
    if row["predicted_total_score"] > row["O_1_5"]:
        return "O 1.5"
    if row["predicted_score_difference"] > row["DNB"] and row["predicted_score_difference"] < row["Win"] and row[
        "predicted_total_score"] > row["O_1_5"]:
        return "O 1.5 or DNB"
    if row["predicted_score_difference"] > row["DNB"] and row["predicted_score_difference"] < row["Win"]:
        return "DNB"
    if row["predicted_score_difference"] > row["Win"] and row["predicted_total_score"] < row["U_4_5"]:
        return "W & U 4.5"
    if row["predicted_total_score"] < row["U_4_5"]:
        return "U 4.5"
    if row["predicted_score_difference"] < row["DNB"]:
        return "N"


def selection_match(row):
    if row["selection"] == "N":
        return "No Sel."
    elif (row["home_score"] + row["away_score"]) < 5 and row["selection"] == "U 4.5":
        return "Match"
    elif row["result"] == row["predicted_result"] and row["selection"] == "W":
        return "Match"
    elif row["result"] == row["predicted_result"] and row["total_score"] > 1 and row["selection"] == "W & O 1.5":
        return "Match"
    elif row["total_score"] > 1 and row["selection"] == "O 1.5":
        return "Match"
    elif (row["result"] == row["predicted_result"] or row["result"] == 'Draw') and row["selection"] == "DNB":
        return "Match"
    elif pd.isna(row["home_score"]):  # Fixed
        return "NA"
    else:
        return "No Match"


predicted['selection'] = predicted.apply(selection, axis=1)
predicted['selection_match'] = predicted.apply(selection_match, axis=1)

# Modifying selection weights
predicted['O_1_5'] = predicted.apply(lambda x: x.predicted_total_score + 0.02 if x.selection_match == "No Match" and (x.selection == "O 1.5" or x.selection == "W & O 1.5") and x.total_score < 2 else x['O_1_5'], axis=1)
predicted['DNB'] = predicted.apply(lambda x: x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "DNB"else (x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W & O 1.5" and x.result != x.predicted_result and x.result != 'Draw'else (x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W" and x.result != x.predicted_result and x.result != 'Draw'else (x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "DNB" and x.result != x.predicted_result and x.result != 'Draw' else x.DNB))), axis=1)
predicted['Win'] = predicted.apply(lambda x: x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection in ["W", "W & O 1.5"] and x.result != x.predicted_result and x.result != 'Draw' else(x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W & O 1.5" and x.result == 'Draw' else(x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W" and x.result != 'Draw' else(x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W" and x.result == 'Draw' else x.Win))), axis=1)
predicted['U_4_5'] = predicted.apply(lambda x: x.predicted_total_score - 0.02 if (x.total_score > 4) and (x.selection == "U 4.5") else x['U_4_5'], axis=1)


# Grouping selection values
grouped_predicted = predicted.groupby(['country', 'league']).agg({'Win': 'max', 'DNB': 'max', 'O_1_5': 'max', 'U_4_5': 'min'})

# Updating Selection Weights
selection_weights.set_index(['country', 'league'], inplace=True)
grouped_predicted.index.names = ['country', 'league']
selection_weights.update(grouped_predicted)
# Finally saving updated selection weights
selection_weights.to_csv(r"C:\Users\harshad\Documents\Harshad Projects\Python Projects\Football Predictor\Files\Backtest\selection.csv")


# Checking if the selection matches
predicted['selection'] = predicted.apply(selection, axis=1)
predicted['selection_match'] = predicted.apply(selection_match, axis=1)


# A bit of insurance
if not predicted[predicted['selection_match'] == 'No Match'].empty:
    print(predicted[predicted['selection_match'] == 'No Match'])
else:
    print("All predictions matched with selection.")


print(predicted.shape[0])
# print(sorted_df)


# Time Taken
end_time = time.time()
total_time = end_time - start_time
print(f"Total time taken: {total_time:.2f} seconds")