#!/usr/bin/python3 from __future__ import print_function import numpy as np import pandas as pd import struct import sys from time import sleep, time o = O = 'O' h = H = 'H' l = L = 'L' c = C = 'C' v = V = 'V' x = 'x' y = 'y' z = 'z' time_list = [] overrun_list = [] overruns = 0 class SierraFile(object): """ """ def __init__(self, filename): self.filename = str(filename) # self.tzAdjust = t imedelta(hours=+10).seconds/d2s self.tzAdjust = np.timedelta64(10, 'h') / np.timedelta64(1, 'D') self.excelDate = np.datetime64('1899-12-30') self.sizeHeader = 0x38 self.sizeRecord = 0x28 self.pos = 0 self.last = 0 def get_existing_records(self): with open(self.filename, 'rb') as fscid: fscid.read(self.sizeHeader) # discard header rows = [] ts = [] for i in range(1000000): data = fscid.read(self.sizeRecord) if data not in ('', b''): d = struct.unpack('d4f4I', data) dt = d[0] + self.tzAdjust ts.append(self.excelDate + np.timedelta64(int(dt)) + (np.timedelta64(int(round((dt - int(dt)) * 86400)), 's'))) datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0] rows.append(datarow) else: break self.pos = self.last = fscid.tell() return (ts, rows) def get_record(self): global overruns, overrun_list with open(self.filename, 'rb') as fscid: fscid.seek(0, 2) # Go to the end of the file self.last = fscid.tell() if self.last == self.pos: # no new data >> nothing to do return (-999, 0, 0) else: # data to collect if self.pos < self.last - self.sizeRecord: # > 1 record print('Overrun', self.last - self.pos, (self.last - self.pos) / self.sizeRecord) overruns += 1 overrun_list.append(np.datetime64('now')) late_flag = True else: late_flag = False fscid.seek(self.pos, 0) self.pos += self.sizeRecord data = fscid.read(self.sizeRecord) d = struct.unpack('d4f4I', data) dt = d[0] + self.tzAdjust new_time = (self.excelDate + np.timedelta64(int(dt)) + (np.timedelta64(int(round((dt - int(dt)) * 86400)), 's'))) datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0] return (new_time, datarow, late_flag) def save_existing_records(self, dataframe): with open(self.filename, 'wb') as fscid: header = b'SCID8\x00\x00\x00(\x00\x00\x00\x01\x00' fscid.write(header) for i in range(21): fscid.write(b'\x00\x00') for i in range(dataframe.end): da = ((dataframe.df.index.values[i] - self.excelDate) / np.timedelta64(1, 'D') - self.tzAdjust) db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i] di = 0x11100111 wt = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di) fscid.write(wt) def save_record(self, dataframe): with open(self.filename, 'ab') as fscid: i = dataframe.end - 1 da = ((dataframe.df.index.values[i] - self.excelDate) / np.timedelta64(1, 'D') - self.tzAdjust) db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i] di = 0x88300388 record = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di) fscid.write(record) class SierraFrame(object): """ """ def __init__(self, time_series, data): self.df = pd.DataFrame(data, index=time_series, columns=[O, H, L, C, V, x, y, z]) self.end = len(self.df) self.pos = 0 self.extend_frame() def extend_frame(self): ''' Create a 4999 row array from last time in self.df and append it to self.df Remove lunch break from array ''' print('Extending DataFrame Now') s5 = np.timedelta64(5, 's') h1 = np.timedelta64(1, 'h') sl = np.datetime64('today') + np.timedelta64(14, 'h') el = np.datetime64('today') + np.timedelta64(15, 'h') start_time = self.df.index.values[self.end - 1] dtgen = ((start_time + i * s5) for i in range(1, 5000)) dtstrip = ((i + h1 if sl <= i < el else i) for i in dtgen) dg = pd.DataFrame(index=dtstrip, columns=self.df.columns) # dg.iloc[0:5000] = 0.0 # dg[[v, x, y, z]] = dg[[v, x, y, z]].astype('int') self.df = self.df.append(dg) def add(self, new_time, datarow): if self.end > len(self.df) - 5: self.extend_frame() # not needed if first fill > day length np_time = np.datetime64(new_time) if np_time < self.df.index.values[self.end]: return # new data is earlier than current while np_time > self.df.index.values[self.end]: self.df.iloc[self.end] = self.df.iloc[self.end - 1] self.end += 1 # fill with prior row if new is later self.df.iloc[self.end] = datarow # fill when times match self.end += 1 def SierraRun(): global time_list time0 = time() filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201306-HKFE-TD.scid' hsi = SierraFile(filename) time_series, data = hsi.get_existing_records() da = SierraFrame(time_series, data) wtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid') wtst.save_existing_records(da) print('df ready', da.end - 1, time() - time0) print(da.df[da.end - 1:da.end + 1]) print() df = da.df print('\n', np.datetime64('now'), da.end) print(df[da.end - 5:da.end + 5]) # print('next', hsi.pos, hsi.last) # jtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid') # time_series, data = jtst.get_existing_records() # ja = SierraFrame(time_series, data) # jf = ja.df # print('\n', ja.end) # print(df[ja.end-5:ja.end+5]) # print('next', jtst.pos, jtst.last) # return # ################### counter = 0 # sys.stdout = os.fdopen(sys.stdout.fileno(), "w", newline=None) counter_flag = False timer_no_data = time() timer_no_data_flag = False overruns = 0 overrun_list = [] while True: time0 = time() new_time, data, late_flag = hsi.get_record() if new_time != -999: da.add(new_time, data) wtst.save_record(da) if counter > 3: time_list.append(time() - time0) timer_no_data = time() # print(da.df[da.end-1:da.end], da.end) print('.', end=' ') sys.stdout.flush() if timer_no_data_flag: print('Data Restored') timer_no_data = time() timer_no_data_flag = False counter += 1 counter_flag = True if time() - timer_no_data >= 120 and not timer_no_data_flag: timer_no_data_flag = True print('Data lost for two minutes') if not late_flag: sleep_time = 0.1 - (time() - time0) if sleep_time > 0: sleep(sleep_time) if counter % 12 == 0 and counter_flag: counter_flag = False print(' Overruns:', overruns, overrun_list, end=' ') print('TimeStats', max(time_list), sum(time_list) / len(time_list), '\n', end=' ') # print(df[da.end-1:da.end]) sys.stdout.flush() # break def main(): SierraRun() if __name__ == '__main__': """ Takes a SierraChart scid file (input argument 1) and converts it to a Pandas DataFrame Timezone conversion can follow the users local timezone, or a specified integer (input l or an integer but if the default filename is being used, '' must be specified for the filename) """ print('start') sys.stdout.flush() main() print('fin') print('Overruns:', overruns, overrun_list) if time_list != []: print('TimeStats', max(time_list), sum(time_list) / len(time_list))