Advertisement
PyNoob11

Untitled

Apr 21st, 2023
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.34 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from datetime import datetime, timedelta
  4. import time
  5. import numexpr as ne
  6.  
  7. start_time = time.time()
  8. # Set display options
  9. pd.set_option('display.max_rows', 30000)
  10. pd.set_option('display.max_columns', 500)
  11. pd.set_option('display.width', 1000)
  12. pd.set_option('display.max_colwidth', None)
  13.  
  14. # Calculating Selection Thresholds
  15. # File Paths
  16. predictions_archive = predictions_archive.csv
  17. train_data = historical_data_cleaned.csv
  18. selection_weights_file = selection.csv
  19.  
  20. train_data = pd.read_csv(train_data)
  21. selection_weights = pd.read_csv(selection_weights_file)
  22. predictions_archive = pd.read_csv(predictions_archive)
  23. train_data['datetime'] = pd.to_datetime(train_data['datetime'])
  24. predictions_archive['match_datetime'] = pd.to_datetime(predictions_archive['match_datetime'])
  25. # print(train_data.head())
  26.  
  27. # Print Missing Dates
  28. # Create a date range for the past year
  29. # start_date = datetime.now() - timedelta(days=365)
  30. # end_date = datetime.now()
  31. # date_range = pd.date_range(start=start_date, end=end_date)
  32. #
  33. # # Get the missing dates
  34. # missing_dates = set(date_range.date) - set(train_data['datetime'].dt.date)
  35. #
  36. # # Print out the missing dates
  37. # if missing_dates:
  38. #     print("The following dates are missing:")
  39. #     for date in sorted(missing_dates):
  40. #         print(date.strftime('%Y-%m-%d'))
  41. # else:
  42. #     print("All dates for the past year are present in the dataframe.")
  43.  
  44. # Setting Selection Weights
  45. grouped_train_data = train_data.groupby(['country', 'league']).apply(
  46.     lambda x: x[['country', 'league']].iloc[0]).reset_index(drop=True)
  47. print("Setting default selection Weights")
  48. grouped_train_data = grouped_train_data.assign(Win=1.1, DNB=0.7, O_1_5=3.2, U_4_5=2.2)
  49. # Save grouped_train dataset to csv for backtest
  50. # grouped_train_data.to_csv(selection_weights_file, index=False) # Don't need to do it once done
  51.  
  52. # Merging train_data ith predictions_archive to get home_score & away_score against predictions
  53. predicted = predictions_archive.merge(
  54.     train_data[['datetime', 'home_score', 'away_score', 'country', 'league', 'home_team', 'away_team']],
  55.     left_on=['match_datetime', 'country', 'league', 'home_team', 'away_team'],
  56.     right_on=['datetime', 'country', 'league', 'home_team', 'away_team'],
  57.     how='left')
  58. predicted.drop('datetime', axis=1, inplace=True)  # Remove datetime column from train_data
  59.  
  60. # "merging and adding columns from grouped_train_data to predicted"
  61.  
  62. # Merge predicted and grouped_train_data dataframes
  63. merged_data = pd.merge(predicted, grouped_train_data, on=['country', 'league'], how='left')
  64. # Select the columns to add to predicted
  65. grouped_train_columns = ['Win', 'DNB', 'O_1_5', 'U_4_5']
  66. # Add the grouped_train_columns to predicted
  67. predicted[grouped_train_columns] = merged_data[grouped_train_columns]
  68. # Adding further columns
  69. predicted['score_difference'] = abs(predicted['home_score'] - predicted['away_score'])
  70. predicted['total_score'] = predicted['home_score'] + predicted['away_score']
  71. predicted['predicted_score_difference'] = abs(predicted['predicted_home_score'] - predicted['predicted_away_score'])
  72. predicted['predicted_total_score'] = predicted['predicted_home_score'] + predicted['predicted_away_score']
  73. predicted['result'] = np.where(predicted['home_score'] > predicted['away_score'], 'home',
  74.                                np.where(predicted['home_score'] < predicted['away_score'], 'away', 'draw'))
  75. predicted['predicted_result'] = np.where(predicted['predicted_home_score'] > predicted['predicted_away_score'], 'home',
  76.                                          np.where(predicted['predicted_home_score'] < predicted['predicted_away_score'],
  77.                                                   'away', 'draw'))
  78. predicted['result_match'] = np.where(predicted['result'] == predicted['predicted_result'], 'match', 'no match')
  79.  
  80.  
  81. # Creating selection functions
  82. def selection(row):
  83.     if row["predicted_score_difference"] > row["Win"] and row["predicted_total_score"] > row["O_1_5"]:
  84.         return "W & O 1.5"
  85.     if row["predicted_score_difference"] > row["Win"]:
  86.         return "W"
  87.     if row["predicted_total_score"] > row["O_1_5"]:
  88.         return "O 1.5"
  89.     if row["predicted_score_difference"] > row["DNB"] and row["predicted_score_difference"] < row["Win"] and row[
  90.         "predicted_total_score"] > row["O_1_5"]:
  91.         return "O 1.5 or DNB"
  92.     if row["predicted_score_difference"] > row["DNB"] and row["predicted_score_difference"] < row["Win"]:
  93.         return "DNB"
  94.     if row["predicted_score_difference"] > row["Win"] and row["predicted_total_score"] < row["U_4_5"]:
  95.         return "W & U 4.5"
  96.     if row["predicted_total_score"] < row["U_4_5"]:
  97.         return "U 4.5"
  98.     if row["predicted_score_difference"] < row["DNB"]:
  99.         return "N"
  100.  
  101.  
  102. def selection_match(row):
  103.     if row["selection"] == "N":
  104.         return "No Sel."
  105.     elif (row["home_score"] + row["away_score"]) < 5 and row["selection"] == "U 4.5":
  106.         return "Match"
  107.     elif row["result"] == row["predicted_result"] and row["selection"] == "W":
  108.         return "Match"
  109.     elif row["result"] == row["predicted_result"] and row["total_score"] > 1 and row["selection"] == "W & O 1.5":
  110.         return "Match"
  111.     elif row["total_score"] > 1 and row["selection"] == "O 1.5":
  112.         return "Match"
  113.     elif (row["result"] == row["predicted_result"] or row["result"] == 'Draw') and row["selection"] == "DNB":
  114.         return "Match"
  115.     elif pd.isna(row["home_score"]):  # Fixed
  116.         return "NA"
  117.     else:
  118.         return "No Match"
  119.  
  120.  
  121. predicted['selection'] = predicted.apply(selection, axis=1)
  122. predicted['selection_match'] = predicted.apply(selection_match, axis=1)
  123.  
  124. # Modifying selection weights
  125. predicted['O_1_5'] = predicted.apply(lambda x: x.predicted_total_score + 0.02 if x.selection_match == "No Match" and (x.selection == "O 1.5" or x.selection == "W & O 1.5") and x.total_score < 2 else x['O_1_5'], axis=1)
  126. predicted['DNB'] = predicted.apply(lambda x: x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "DNB"else (x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W & O 1.5" and x.result != x.predicted_result and x.result != 'Draw'else (x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W" and x.result != x.predicted_result and x.result != 'Draw'else (x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "DNB" and x.result != x.predicted_result and x.result != 'Draw' else x.DNB))), axis=1)
  127. predicted['Win'] = predicted.apply(lambda x: x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection in ["W", "W & O 1.5"] and x.result != x.predicted_result and x.result != 'Draw' else(x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W & O 1.5" and x.result == 'Draw' else(x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W" and x.result != 'Draw' else(x.predicted_score_difference + 0.02 if x.selection_match == "No Match" and x.selection == "W" and x.result == 'Draw' else x.Win))), axis=1)
  128. predicted['U_4_5'] = predicted.apply(lambda x: x.predicted_total_score - 0.02 if (x.total_score > 4) and (x.selection == "U 4.5") else x['U_4_5'], axis=1)
  129.  
  130.  
  131. # Grouping selection values
  132. grouped_predicted = predicted.groupby(['country', 'league']).agg({'Win': 'max', 'DNB': 'max', 'O_1_5': 'max', 'U_4_5': 'min'})
  133.  
  134. # Updating Selection Weights
  135. selection_weights.set_index(['country', 'league'], inplace=True)
  136. grouped_predicted.index.names = ['country', 'league']
  137. selection_weights.update(grouped_predicted)
  138. # Finally saving updated selection weights
  139. selection_weights.to_csv(r"C:\Users\harshad\Documents\Harshad Projects\Python Projects\Football Predictor\Files\Backtest\selection.csv")
  140.  
  141.  
  142. # Checking if the selection matches
  143. predicted['selection'] = predicted.apply(selection, axis=1)
  144. predicted['selection_match'] = predicted.apply(selection_match, axis=1)
  145.  
  146.  
  147. # A bit of insurance
  148. if not predicted[predicted['selection_match'] == 'No Match'].empty:
  149.     print(predicted[predicted['selection_match'] == 'No Match'])
  150. else:
  151.     print("All predictions matched with selection.")
  152.  
  153.  
  154. print(predicted.shape[0])
  155. # print(sorted_df)
  156.  
  157.  
  158. # Time Taken
  159. end_time = time.time()
  160. total_time = end_time - start_time
  161. print(f"Total time taken: {total_time:.2f} seconds")
  162.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement