Untitled

import json
import pandas as pd
from collections import Counter
import datetime
from math import isclose

TS0 = datetime.datetime(1,1,1,0,0,0)
def ts_to_time(timestamp):
    return (TS0 + datetime.timedelta(milliseconds=timestamp) + datetime.timedelta(hours=1)).strftime('%H:%M:%S')

def lap_in_sec(laptime, as_string=True):
    # for converting 1:23.456 into 83.456 if someone wants to plot them later
    if laptime == '':
        return ''
    else:
        qq1, qq2 = laptime.split(':')
        laptime_float = 60*int(qq1) + float(qq2)
        return '%.3f' % laptime_float if as_string else laptime_float

def is_laptime_right(row):
    # for sanity checking s1 + s2 + s3 == laptime
    if row['s1'] != '' and row['s2'] != '' and row['s3'] != '' and row['laptime'] != '':
        laptime_s123 = float(row['s1']) + float(row['s2']) + float(row['s3'])
        laptime_full = lap_in_sec(row['laptime'], as_string=False)
        return 'ok' if isclose(laptime_s123, laptime_full, abs_tol=0.0001) else 'PROBLEM'
    else:
        return 'partial'

def find_key_in_json(frame, searchkey='DR'):
    found_entries = []
    def seek_recursively(entry):
        if isinstance(entry, list):
            for element in entry:
                seek_recursively(element)
        elif isinstance(entry, dict):
            if searchkey in entry:
                found_entries.append(entry[searchkey])
            else:
                for entry2 in entry.values():
                    seek_recursively(entry2)
        else:
            return None
    seek_recursively(frame)
    return found_entries


frames = []
with open('day2-afternoon.json', 'r') as infile:
    frames = json.load(infile)


drinames = find_key_in_json(frames, 'init')
for driverlist in drinames:
    ts = find_key_in_json(driverlist, 'T')[0]
    print()
    print('driver list at timestamp', ts, ts_to_time(ts))
    drivers = {i: x['Initials'] for i, x in enumerate(driverlist['data']['Drivers'])}
    print(drivers)
print()
print('using last one. If you see more than 1 driver list, make sure to check that they are all equal')
print('otherwise driver IDs at different timestamps will have to be converted to different initials!')


frames2 = []  # cleaning out irrelevant frames.
frames2_thrownout = []
for frame in frames:
    if tuple(frame.keys()) != ('C', 'M'):
        frames2_thrownout.append(frame)
        continue  # all the timing frames seem to have only C and M keys
    multiframes = frame['M']  # usually only one element, but sometimes 2 or 3
    for subframe in multiframes:
        # subframe always has 3 keys: H, M, A. H&M are constant strings, only information is under A.
        # subframe['A'] is a fixed list of 3. First element is SPFeed / ExtrapolatedClock / StreamingStatus (almost all SPFeed)
        # second element is rich data
        # third element is text timestamp
        if subframe['A'][0] == 'SPFeed':
            frames2.append(subframe['A'][1])


cleaned_dr = []
last_timestamp = 0
for timestamp, drentries in ((find_key_in_json(x, 'T'), find_key_in_json(x, 'DR')) for x in frames2):
    if timestamp:
        ts = timestamp[0]
        last_timestamp = ts
        warning = ''
    else:
        print('no timestamp, adding a fake one based on the previous', drentries)
        ts = last_timestamp + 1
        warning = 'missing_timestamp'
    for drentry in drentries:
        if not isinstance(drentry, dict):  # early initialization frames may be weird, ignore
            continue
        for driverid, data in drentry.items():
            data['DRIVERID'] = int(driverid)
            data['TIMESTAMP'] = ts
            data['WARNING'] = warning
            cleaned_dr.append(data)


todf = []
for frame in cleaned_dr:
    try:
        yy = frame['O'].copy()
        yy['dri'] = frame['DRIVERID']
        yy['ts'] = frame['TIMESTAMP']
        todf.append(yy)
    except:
        pass

for frame in cleaned_dr:
    try:
        yy = frame['X'].copy()
        zz = frame['TI']
        zz2 = zz if isinstance(zz, list) else tuple(zz.values())
        zz3 = {'age': str(zz2[2]), 'dri': frame['DRIVERID'], 'ts': frame['TIMESTAMP']}  # 'stint' : zz2[1]

        yy['tyrehist'] = yy.pop('9')
        yy['tyre'] = yy['tyrehist'][0]
        yy['dri'] = frame['DRIVERID']
        yy['ts'] = frame['TIMESTAMP']
        todf.append(yy)
        todf.append(zz3)
    except:
        continue  # screw this below.
        # It's redundant information and it always arrives a few seconds after the laptime making,
        # making it a pain to integrate in later steps. They can be calculated from the stint start if someone really wants is
        if 'TI' in frame:
            zz = frame['TI']
            zz2 = zz2 if isinstance(zz, list) else tuple(zz.values())
            if len(zz2) == 2:
                zz3 = {'stint': '%02d' % zz2[0], 'age': '%02d' % zz2[1], 'dri': frame['DRIVERID'], 'ts': frame['TIMESTAMP']}
                todf.append(zz3)
            else:
                print('problem with tyre info', frame)


fieldnames = {
    '1': 'laptime',
    '2': 'sections',
    '3': 'dunno1',
    '4': 'dunno2',
    '5': 's1',
    '6': 's2',
    '7': 's3',
    '8': 'dunno3',
    '9': 'dunno4',
    '10': 'sp1',
    '11': 'sp2',
    '12': 'sp3',
    '13': 'sp0',
    '14': 'gap'
}
df = pd.DataFrame(todf).fillna('').rename(columns=fieldnames)
df['driver'] = df['dri'].apply(drivers.get)
df['time'] = df['ts'].apply(ts_to_time)
df.sort_values(['driver', 'ts'], inplace=True)

df = df.loc[:, 'driver time s1 s2 s3 laptime sp0 sp1 sp2 sp3 tyre age'.split()]
df2 = df.query('s1!="" | s2!="" | s3!="" | laptime!="" | sp0!="" | sp1!="" | sp2!="" | sp3!="" | tyre!="" | age!=""').reset_index(drop=True)


# group the sector times into full laps
all_laps = []
for driver, subdf in df2.groupby('driver'):

    driver_laps = []
    data_for_lap = []

    has_laptime = False
    has_s1 = False
    has_s2 = False
    has_s3 = False

    for _, row in subdf.iterrows():
        if row['s1'] != '':
            # if s1 follows any other unprinted sector/laptime info, it's some weird in/out lap
            # so first dump the previous information
            if has_s1 or has_s2 or has_s3 or has_laptime:  # TODO: add has_tyre to get rid of a few stint opening problems. Low priority
                lapdf = pd.DataFrame(data_for_lap)
                full_lap = pd.DataFrame(data_for_lap).max(axis=0)
                driver_laps.append(full_lap)
                data_for_lap, has_laptime, has_s1, has_s2, has_s3 = [], False, False, False, False
            has_s1 = True
        if row['s2'] != '':
            # if s2 follows any other unprinted s2/s3/laptime info, same thing, dump before continuing
            if has_s2 or has_s3 or has_laptime:
                lapdf = pd.DataFrame(data_for_lap)
                full_lap = pd.DataFrame(data_for_lap).max(axis=0)
                driver_laps.append(full_lap)
                data_for_lap, has_laptime, has_s1, has_s2, has_s3 = [], False, False, False, False
            has_s2 = True
        if row['s3'] != '':
            has_s3 = True
        if row['laptime'] != '':
            has_laptime = True

        data_for_lap.append(row)

        if has_laptime and has_s3: # wait until both s3 and laptime is available, whichever comes later
            # finished collecting info for lap, time to save it
            lapdf = pd.DataFrame(data_for_lap)
            full_lap = pd.DataFrame(data_for_lap).max(axis=0)
            driver_laps.append(full_lap)
            data_for_lap, has_laptime, has_s1, has_s2, has_s3 = [], False, False, False, False

    driver_all_laps = pd.DataFrame(driver_laps)
    driver_all_laps.insert(1, 'lapno', range(1, driver_all_laps.shape[0] + 1))
    all_laps.append(driver_all_laps)

lapdf = pd.concat(all_laps, axis=0, ignore_index=True)
lapdf['lapsecs'] = lapdf['laptime'].apply(lap_in_sec)
lapdf['math'] = lapdf.apply(is_laptime_right, axis=1)


# df.to_csv('day2-afternoon-raw.tsv', sep='\t', index=False)

lapdf.to_csv('day2-afternoon-lapchart.tsv', sep='\t', index=False)

print('finished')