Advertisement
Guest User

Untitled

a guest
Jun 20th, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.15 KB | None | 0 0
  1. import MySQLdb
  2. import pandas as pd
  3. from multiprocessing import Process, freeze_support
  4. from urllib.error import HTTPError
  5. from tqdm import tqdm
  6. import time
  7.  
  8. db = MySQLdb.connect('tcbinstance.cwudbgjghls4.us-east-1.rds.amazonaws.com', 'artvandelay', 'gumby1234', db='mlb')
  9. c = db.cursor()
  10.  
  11. c.execute("SELECT max(game_date) FROM mlb.pitchdata;")
  12. test = c.fetchall()
  13. latest_date = test[0][0]
  14.  
  15. teams_a = ['LAA', 'HOU', 'OAK', 'TOR', 'ATL', 'MIL', 'STL','CHC']
  16. teams_b = ['ARI', 'LAD', 'SF', 'CLE', 'SEA', 'MIA', 'NYM', 'WSH']
  17. teams_c = ['BAL', 'SD', 'PHI', 'PIT', 'TEX','TB', 'BOS', 'CIN']
  18. teams_d = ['COL', 'KC', 'DET', 'MIN', 'CWS', 'NYY']
  19. teams = [teams_a, teams_b, teams_c, teams_d]
  20.  
  21. def upload(file):
  22.     for team in file:
  23.         successful = False
  24.         backoff_time = 30
  25.         while not successful:
  26.             try:
  27.                 year = 2018
  28.                 link = 'https://baseballsavant.mlb.com/statcast_search/csv?all=true&hfPT=&hfAB=&hfBBT=&hfPR=&hfZ=&stadium=&hfBBL=&hfNewZones=&hfGT=&hfC=&hfSea=' + str(
  29.                     year) + '%7C&hfSit=&player_type=pitcher&hfOuts=&opponent=&pitcher_throws=&batter_stands=&hfSA=&game_date_gt=&game_date_lt=&team=' + team + '&position=&hfRO=&home_road=&hfFlag=&metric_1=&hfInn=&min_pitches=0&min_results=0&group_by=name-event&sort_col=pitches&player_event_sort=api_p_release_speed&sort_order=desc&min_abs=0&type=details&'
  30.                 data = pd.read_csv(link, low_memory=False)
  31.                 data['game_date'] = pd.to_datetime(data['game_date'])
  32.                 data = data[(data['game_date'] > latest_date)]
  33.                 print(data)
  34.                 successful = True
  35.             except (HTTPError) as e:
  36.                 # If there is an error backoff exponentially until there is no longer an error
  37.                 for i in tqdm(range(1, backoff_time), desc="Backing off " + str(backoff_time) + " seconds",
  38.                               leave=False):
  39.                     time.sleep(1)
  40.                 backoff_time = min(backoff_time * 2, 60 * 60)
  41.  
  42.  
  43. if __name__ == '__main__':
  44.     freeze_support()
  45.     for group in teams:
  46.         proc = Process(target=upload, args=(group,))
  47.         proc.start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement