Advertisement
PyNoob1

Step-2-Download Updated Data and concat with Historical Data

Jun 11th, 2021
150
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.34 KB | None | 0 0
  1. import pandas as pd
  2. from datetime import datetime
  3. import numpy as np
  4.  
  5. start = datetime.now()
  6.  
  7. print("Downloading data for Season 2020")
  8. Season_2020 = pd.read_excel(r"https://football-data.co.uk/mmz4281/2021/all-euro-data-2020-2021.xlsx", engine='openpyxl',
  9.                             sheet_name=None)
  10. print("Downloading data for Season 2020 completed successfully")
  11. print("Concatenating data for Season 2020")
  12. Season_2020_c = pd.concat(Season_2020, axis=0, ignore_index=True)
  13. print("Concatenating data for Season 2020 completed successfully")
  14. print("Using saved database till last season")
  15. Season93_20 = pd.read_csv(r"C:\Users\harsh\Google Drive\sportsintel.shop\Files\Output\dataset(1993-2019).csv",
  16.                           low_memory=False)
  17. print("Loading saved database complete")
  18. print("Filling missing team names with HT and AT columns")
  19. Season93_20.HomeTeam = Season93_20['HomeTeam'].fillna(Season93_20['HT'])
  20. Season93_20.AwayTeam = Season93_20['AwayTeam'].fillna(Season93_20['AT'])
  21. print("Dropping excess columns")
  22. Season93_20 = Season93_20.drop(
  23.     ['HT', 'AT', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 54',
  24.      'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 30',
  25.      'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27',
  26.      'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21'
  27.      ], axis=1)
  28. print("Loading historical data complete")
  29. print("Defining list of all seasons")
  30. Season_list = [Season_2020_c, Season93_20]
  31. print("Concatenating all data into one dataframe")
  32. db_concat = pd.concat(Season_list)
  33. print("Filtering out null date rows")
  34. db_concat = db_concat.dropna(subset=['Date'])
  35. print("Defining Column Types")
  36. column_dict = {'HTR': str,
  37.                'Referee': str,
  38.                'BbAH': int,
  39.                'BbAHh': float,
  40.                }
  41. db_concat = db_concat.astype(column_dict, errors='ignore')
  42. print("Dropping duplicates and cleaning dataset")
  43. db_concat = db_concat.drop_duplicates()
  44. print("Saving csv")
  45. db_concat.to_csv(r"C:\Users\harsh\Google Drive\sportsintel.shop\Files\Output\dataset_updated.csv")
  46. print("File Saved")
  47. print("Success")
  48. end = datetime.now()
  49. time_taken = end - start
  50. print('Time taken to complete: ', time_taken)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement