Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import pandas as pd
- from collections import Counter
- import datetime
- from math import isclose
- TS0 = datetime.datetime(1,1,1,0,0,0)
- def ts_to_time(timestamp):
- return (TS0 + datetime.timedelta(milliseconds=timestamp) + datetime.timedelta(hours=1)).strftime('%H:%M:%S')
- def lap_in_sec(laptime, as_string=True):
- # for converting 1:23.456 into 83.456 if someone wants to plot them later
- if laptime == '':
- return ''
- else:
- qq1, qq2 = laptime.split(':')
- laptime_float = 60*int(qq1) + float(qq2)
- return '%.3f' % laptime_float if as_string else laptime_float
- def is_laptime_right(row):
- # for sanity checking s1 + s2 + s3 == laptime
- if row['s1'] != '' and row['s2'] != '' and row['s3'] != '' and row['laptime'] != '':
- laptime_s123 = float(row['s1']) + float(row['s2']) + float(row['s3'])
- laptime_full = lap_in_sec(row['laptime'], as_string=False)
- return 'ok' if isclose(laptime_s123, laptime_full, abs_tol=0.0001) else 'PROBLEM'
- else:
- return 'partial'
- def find_key_in_json(frame, searchkey='DR'):
- found_entries = []
- def seek_recursively(entry):
- if isinstance(entry, list):
- for element in entry:
- seek_recursively(element)
- elif isinstance(entry, dict):
- if searchkey in entry:
- found_entries.append(entry[searchkey])
- else:
- for entry2 in entry.values():
- seek_recursively(entry2)
- else:
- return None
- seek_recursively(frame)
- return found_entries
- frames = []
- with open('day2-afternoon.json', 'r') as infile:
- frames = json.load(infile)
- drinames = find_key_in_json(frames, 'init')
- for driverlist in drinames:
- ts = find_key_in_json(driverlist, 'T')[0]
- print()
- print('driver list at timestamp', ts, ts_to_time(ts))
- drivers = {i: x['Initials'] for i, x in enumerate(driverlist['data']['Drivers'])}
- print(drivers)
- print()
- print('using last one. If you see more than 1 driver list, make sure to check that they are all equal')
- print('otherwise driver IDs at different timestamps will have to be converted to different initials!')
- frames2 = [] # cleaning out irrelevant frames.
- frames2_thrownout = []
- for frame in frames:
- if tuple(frame.keys()) != ('C', 'M'):
- frames2_thrownout.append(frame)
- continue # all the timing frames seem to have only C and M keys
- multiframes = frame['M'] # usually only one element, but sometimes 2 or 3
- for subframe in multiframes:
- # subframe always has 3 keys: H, M, A. H&M are constant strings, only information is under A.
- # subframe['A'] is a fixed list of 3. First element is SPFeed / ExtrapolatedClock / StreamingStatus (almost all SPFeed)
- # second element is rich data
- # third element is text timestamp
- if subframe['A'][0] == 'SPFeed':
- frames2.append(subframe['A'][1])
- cleaned_dr = []
- last_timestamp = 0
- for timestamp, drentries in ((find_key_in_json(x, 'T'), find_key_in_json(x, 'DR')) for x in frames2):
- if timestamp:
- ts = timestamp[0]
- last_timestamp = ts
- warning = ''
- else:
- print('no timestamp, adding a fake one based on the previous', drentries)
- ts = last_timestamp + 1
- warning = 'missing_timestamp'
- for drentry in drentries:
- if not isinstance(drentry, dict): # early initialization frames may be weird, ignore
- continue
- for driverid, data in drentry.items():
- data['DRIVERID'] = int(driverid)
- data['TIMESTAMP'] = ts
- data['WARNING'] = warning
- cleaned_dr.append(data)
- todf = []
- for frame in cleaned_dr:
- try:
- yy = frame['O'].copy()
- yy['dri'] = frame['DRIVERID']
- yy['ts'] = frame['TIMESTAMP']
- todf.append(yy)
- except:
- pass
- for frame in cleaned_dr:
- try:
- yy = frame['X'].copy()
- zz = frame['TI']
- zz2 = zz if isinstance(zz, list) else tuple(zz.values())
- zz3 = {'age': str(zz2[2]), 'dri': frame['DRIVERID'], 'ts': frame['TIMESTAMP']} # 'stint' : zz2[1]
- yy['tyrehist'] = yy.pop('9')
- yy['tyre'] = yy['tyrehist'][0]
- yy['dri'] = frame['DRIVERID']
- yy['ts'] = frame['TIMESTAMP']
- todf.append(yy)
- todf.append(zz3)
- except:
- continue # screw this below.
- # It's redundant information and it always arrives a few seconds after the laptime making,
- # making it a pain to integrate in later steps. They can be calculated from the stint start if someone really wants is
- if 'TI' in frame:
- zz = frame['TI']
- zz2 = zz2 if isinstance(zz, list) else tuple(zz.values())
- if len(zz2) == 2:
- zz3 = {'stint': '%02d' % zz2[0], 'age': '%02d' % zz2[1], 'dri': frame['DRIVERID'], 'ts': frame['TIMESTAMP']}
- todf.append(zz3)
- else:
- print('problem with tyre info', frame)
- fieldnames = {
- '1': 'laptime',
- '2': 'sections',
- '3': 'dunno1',
- '4': 'dunno2',
- '5': 's1',
- '6': 's2',
- '7': 's3',
- '8': 'dunno3',
- '9': 'dunno4',
- '10': 'sp1',
- '11': 'sp2',
- '12': 'sp3',
- '13': 'sp0',
- '14': 'gap'
- }
- df = pd.DataFrame(todf).fillna('').rename(columns=fieldnames)
- df['driver'] = df['dri'].apply(drivers.get)
- df['time'] = df['ts'].apply(ts_to_time)
- df.sort_values(['driver', 'ts'], inplace=True)
- df = df.loc[:, 'driver time s1 s2 s3 laptime sp0 sp1 sp2 sp3 tyre age'.split()]
- df2 = df.query('s1!="" | s2!="" | s3!="" | laptime!="" | sp0!="" | sp1!="" | sp2!="" | sp3!="" | tyre!="" | age!=""').reset_index(drop=True)
- # group the sector times into full laps
- all_laps = []
- for driver, subdf in df2.groupby('driver'):
- driver_laps = []
- data_for_lap = []
- has_laptime = False
- has_s1 = False
- has_s2 = False
- has_s3 = False
- for _, row in subdf.iterrows():
- if row['s1'] != '':
- # if s1 follows any other unprinted sector/laptime info, it's some weird in/out lap
- # so first dump the previous information
- if has_s1 or has_s2 or has_s3 or has_laptime: # TODO: add has_tyre to get rid of a few stint opening problems. Low priority
- lapdf = pd.DataFrame(data_for_lap)
- full_lap = pd.DataFrame(data_for_lap).max(axis=0)
- driver_laps.append(full_lap)
- data_for_lap, has_laptime, has_s1, has_s2, has_s3 = [], False, False, False, False
- has_s1 = True
- if row['s2'] != '':
- # if s2 follows any other unprinted s2/s3/laptime info, same thing, dump before continuing
- if has_s2 or has_s3 or has_laptime:
- lapdf = pd.DataFrame(data_for_lap)
- full_lap = pd.DataFrame(data_for_lap).max(axis=0)
- driver_laps.append(full_lap)
- data_for_lap, has_laptime, has_s1, has_s2, has_s3 = [], False, False, False, False
- has_s2 = True
- if row['s3'] != '':
- has_s3 = True
- if row['laptime'] != '':
- has_laptime = True
- data_for_lap.append(row)
- if has_laptime and has_s3: # wait until both s3 and laptime is available, whichever comes later
- # finished collecting info for lap, time to save it
- lapdf = pd.DataFrame(data_for_lap)
- full_lap = pd.DataFrame(data_for_lap).max(axis=0)
- driver_laps.append(full_lap)
- data_for_lap, has_laptime, has_s1, has_s2, has_s3 = [], False, False, False, False
- driver_all_laps = pd.DataFrame(driver_laps)
- driver_all_laps.insert(1, 'lapno', range(1, driver_all_laps.shape[0] + 1))
- all_laps.append(driver_all_laps)
- lapdf = pd.concat(all_laps, axis=0, ignore_index=True)
- lapdf['lapsecs'] = lapdf['laptime'].apply(lap_in_sec)
- lapdf['math'] = lapdf.apply(is_laptime_right, axis=1)
- # df.to_csv('day2-afternoon-raw.tsv', sep='\t', index=False)
- lapdf.to_csv('day2-afternoon-lapchart.tsv', sep='\t', index=False)
- print('finished')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement