#!/usr/bin/python3
from __future__ import print_function
import numpy as np
import pandas as pd
import struct
import sys
from time import sleep, time
o = O = 'O'
h = H = 'H'
l = L = 'L'
c = C = 'C'
v = V = 'V'
x = 'x'
y = 'y'
z = 'z'
time_list = []
overrun_list = []
overruns = 0
class SierraFile(object):
""" """
def __init__(self, filename):
self.filename = str(filename)
# self.tzAdjust = t imedelta(hours=+10).seconds/d2s
self.tzAdjust = np.timedelta64(10, 'h') / np.timedelta64(1, 'D')
self.excelDate = np.datetime64('1899-12-30')
self.sizeHeader = 0x38
self.sizeRecord = 0x28
self.pos = 0
self.last = 0
def get_existing_records(self):
with open(self.filename, 'rb') as fscid:
fscid.read(self.sizeHeader) # discard header
rows = []
ts = []
for i in range(1000000):
data = fscid.read(self.sizeRecord)
if data not in ('', b''):
d = struct.unpack('d4f4I', data)
dt = d[0] + self.tzAdjust
ts.append(self.excelDate + np.timedelta64(int(dt))
+ (np.timedelta64(int(round((dt - int(dt))
* 86400)), 's')))
datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0]
rows.append(datarow)
else:
break
self.pos = self.last = fscid.tell()
return (ts, rows)
def get_record(self):
global overruns, overrun_list
with open(self.filename, 'rb') as fscid:
fscid.seek(0, 2) # Go to the end of the file
self.last = fscid.tell()
if self.last == self.pos: # no new data >> nothing to do
return (-999, 0, 0)
else: # data to collect
if self.pos < self.last - self.sizeRecord: # > 1 record
print('Overrun', self.last - self.pos,
(self.last - self.pos) / self.sizeRecord)
overruns += 1
overrun_list.append(np.datetime64('now'))
late_flag = True
else:
late_flag = False
fscid.seek(self.pos, 0)
self.pos += self.sizeRecord
data = fscid.read(self.sizeRecord)
d = struct.unpack('d4f4I', data)
dt = d[0] + self.tzAdjust
new_time = (self.excelDate + np.timedelta64(int(dt))
+ (np.timedelta64(int(round((dt - int(dt))
* 86400)), 's')))
datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0]
return (new_time, datarow, late_flag)
def save_existing_records(self, dataframe):
with open(self.filename, 'wb') as fscid:
header = b'SCID8\x00\x00\x00(\x00\x00\x00\x01\x00'
fscid.write(header)
for i in range(21):
fscid.write(b'\x00\x00')
for i in range(dataframe.end):
da = ((dataframe.df.index.values[i] - self.excelDate)
/ np.timedelta64(1, 'D') - self.tzAdjust)
db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
di = 0x11100111
wt = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
fscid.write(wt)
def save_record(self, dataframe):
with open(self.filename, 'ab') as fscid:
i = dataframe.end - 1
da = ((dataframe.df.index.values[i] - self.excelDate)
/ np.timedelta64(1, 'D') - self.tzAdjust)
db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
di = 0x88300388
record = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
fscid.write(record)
class SierraFrame(object):
""" """
def __init__(self, time_series, data):
self.df = pd.DataFrame(data, index=time_series,
columns=[O, H, L, C, V, x, y, z])
self.end = len(self.df)
self.pos = 0
self.extend_frame()
def extend_frame(self):
'''
Create a 4999 row array from last time in self.df
and append it to self.df
Remove lunch break from array
'''
print('Extending DataFrame Now')
s5 = np.timedelta64(5, 's')
h1 = np.timedelta64(1, 'h')
sl = np.datetime64('today') + np.timedelta64(14, 'h')
el = np.datetime64('today') + np.timedelta64(15, 'h')
start_time = self.df.index.values[self.end - 1]
dtgen = ((start_time + i * s5) for i in range(1, 5000))
dtstrip = ((i + h1 if sl <= i < el else i) for i in dtgen)
dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
# dg.iloc[0:5000] = 0.0
# dg[[v, x, y, z]] = dg[[v, x, y, z]].astype('int')
self.df = self.df.append(dg)
def add(self, new_time, datarow):
if self.end > len(self.df) - 5:
self.extend_frame() # not needed if first fill > day length
np_time = np.datetime64(new_time)
if np_time < self.df.index.values[self.end]:
return # new data is earlier than current
while np_time > self.df.index.values[self.end]:
self.df.iloc[self.end] = self.df.iloc[self.end - 1]
self.end += 1 # fill with prior row if new is later
self.df.iloc[self.end] = datarow # fill when times match
self.end += 1
def SierraRun():
global time_list
time0 = time()
filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201306-HKFE-TD.scid'
hsi = SierraFile(filename)
time_series, data = hsi.get_existing_records()
da = SierraFrame(time_series, data)
wtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
wtst.save_existing_records(da)
print('df ready', da.end - 1, time() - time0)
print(da.df[da.end - 1:da.end + 1])
print()
df = da.df
print('\n', np.datetime64('now'), da.end)
print(df[da.end - 5:da.end + 5])
# print('next', hsi.pos, hsi.last)
# jtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
# time_series, data = jtst.get_existing_records()
# ja = SierraFrame(time_series, data)
# jf = ja.df
# print('\n', ja.end)
# print(df[ja.end-5:ja.end+5])
# print('next', jtst.pos, jtst.last)
# return # ###################
counter = 0
# sys.stdout = os.fdopen(sys.stdout.fileno(), "w", newline=None)
counter_flag = False
timer_no_data = time()
timer_no_data_flag = False
overruns = 0
overrun_list = []
while True:
time0 = time()
new_time, data, late_flag = hsi.get_record()
if new_time != -999:
da.add(new_time, data)
wtst.save_record(da)
if counter > 3:
time_list.append(time() - time0)
timer_no_data = time()
# print(da.df[da.end-1:da.end], da.end)
print('.', end=' ')
sys.stdout.flush()
if timer_no_data_flag:
print('Data Restored')
timer_no_data = time()
timer_no_data_flag = False
counter += 1
counter_flag = True
if time() - timer_no_data >= 120 and not timer_no_data_flag:
timer_no_data_flag = True
print('Data lost for two minutes')
if not late_flag:
sleep_time = 0.1 - (time() - time0)
if sleep_time > 0:
sleep(sleep_time)
if counter % 12 == 0 and counter_flag:
counter_flag = False
print(' Overruns:', overruns, overrun_list, end=' ')
print('TimeStats', max(time_list), sum(time_list) / len(time_list),
'\n', end=' ')
# print(df[da.end-1:da.end])
sys.stdout.flush()
# break
def main():
SierraRun()
if __name__ == '__main__':
"""
Takes a SierraChart scid file (input argument 1) and converts
it to a Pandas DataFrame
Timezone conversion can follow the users local timezone, or a
specified integer (input l or an integer but if the default
filename is being used, '' must be specified for the filename)
"""
print('start')
sys.stdout.flush()
main()
print('fin')
print('Overruns:', overruns, overrun_list)
if time_list != []:
print('TimeStats', max(time_list), sum(time_list) / len(time_list))