Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- from __future__ import print_function
- import numpy as np
- import pandas as pd
- import struct
- import sys
- from time import sleep, time
- o = O = 'O'
- h = H = 'H'
- l = L = 'L'
- c = C = 'C'
- v = V = 'V'
- x = 'x'
- y = 'y'
- z = 'z'
- time_list = []
- overrun_list = []
- overruns = 0
- class SierraFile(object):
- """ """
- def __init__(self, filename):
- self.filename = str(filename)
- # self.tzAdjust = t imedelta(hours=+10).seconds/d2s
- self.tzAdjust = np.timedelta64(10, 'h') / np.timedelta64(1, 'D')
- self.excelDate = np.datetime64('1899-12-30')
- self.sizeHeader = 0x38
- self.sizeRecord = 0x28
- self.pos = 0
- self.last = 0
- def get_existing_records(self):
- with open(self.filename, 'rb') as fscid:
- fscid.read(self.sizeHeader) # discard header
- rows = []
- ts = []
- for i in range(1000000):
- data = fscid.read(self.sizeRecord)
- if data not in ('', b''):
- d = struct.unpack('d4f4I', data)
- dt = d[0] + self.tzAdjust
- ts.append(self.excelDate + np.timedelta64(int(dt))
- + (np.timedelta64(int(round((dt - int(dt))
- * 86400)), 's')))
- datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0]
- rows.append(datarow)
- else:
- break
- self.pos = self.last = fscid.tell()
- return (ts, rows)
- def get_record(self):
- global overruns, overrun_list
- with open(self.filename, 'rb') as fscid:
- fscid.seek(0, 2) # Go to the end of the file
- self.last = fscid.tell()
- if self.last == self.pos: # no new data >> nothing to do
- return (-999, 0, 0)
- else: # data to collect
- if self.pos < self.last - self.sizeRecord: # > 1 record
- print('Overrun', self.last - self.pos,
- (self.last - self.pos) / self.sizeRecord)
- overruns += 1
- overrun_list.append(np.datetime64('now'))
- late_flag = True
- else:
- late_flag = False
- fscid.seek(self.pos, 0)
- self.pos += self.sizeRecord
- data = fscid.read(self.sizeRecord)
- d = struct.unpack('d4f4I', data)
- dt = d[0] + self.tzAdjust
- new_time = (self.excelDate + np.timedelta64(int(dt))
- + (np.timedelta64(int(round((dt - int(dt))
- * 86400)), 's')))
- datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0]
- return (new_time, datarow, late_flag)
- def save_existing_records(self, dataframe):
- with open(self.filename, 'wb') as fscid:
- header = b'SCID8\x00\x00\x00(\x00\x00\x00\x01\x00'
- fscid.write(header)
- for i in range(21):
- fscid.write(b'\x00\x00')
- for i in range(dataframe.end):
- da = ((dataframe.df.index.values[i] - self.excelDate)
- / np.timedelta64(1, 'D') - self.tzAdjust)
- db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
- di = 0x11100111
- wt = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
- fscid.write(wt)
- def save_record(self, dataframe):
- with open(self.filename, 'ab') as fscid:
- i = dataframe.end - 1
- da = ((dataframe.df.index.values[i] - self.excelDate)
- / np.timedelta64(1, 'D') - self.tzAdjust)
- db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
- di = 0x88300388
- record = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
- fscid.write(record)
- class SierraFrame(object):
- """ """
- def __init__(self, time_series, data):
- self.df = pd.DataFrame(data, index=time_series,
- columns=[O, H, L, C, V, x, y, z])
- self.end = len(self.df)
- self.pos = 0
- self.extend_frame()
- def extend_frame(self):
- '''
- Create a 4999 row array from last time in self.df
- and append it to self.df
- Remove lunch break from array
- '''
- print('Extending DataFrame Now')
- s5 = np.timedelta64(5, 's')
- h1 = np.timedelta64(1, 'h')
- sl = np.datetime64('today') + np.timedelta64(14, 'h')
- el = np.datetime64('today') + np.timedelta64(15, 'h')
- start_time = self.df.index.values[self.end - 1]
- dtgen = ((start_time + i * s5) for i in range(1, 5000))
- dtstrip = ((i + h1 if sl <= i < el else i) for i in dtgen)
- dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
- # dg.iloc[0:5000] = 0.0
- # dg[[v, x, y, z]] = dg[[v, x, y, z]].astype('int')
- self.df = self.df.append(dg)
- def add(self, new_time, datarow):
- if self.end > len(self.df) - 5:
- self.extend_frame() # not needed if first fill > day length
- np_time = np.datetime64(new_time)
- if np_time < self.df.index.values[self.end]:
- return # new data is earlier than current
- while np_time > self.df.index.values[self.end]:
- self.df.iloc[self.end] = self.df.iloc[self.end - 1]
- self.end += 1 # fill with prior row if new is later
- self.df.iloc[self.end] = datarow # fill when times match
- self.end += 1
- def SierraRun():
- global time_list
- time0 = time()
- filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201306-HKFE-TD.scid'
- hsi = SierraFile(filename)
- time_series, data = hsi.get_existing_records()
- da = SierraFrame(time_series, data)
- wtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
- wtst.save_existing_records(da)
- print('df ready', da.end - 1, time() - time0)
- print(da.df[da.end - 1:da.end + 1])
- print()
- df = da.df
- print('\n', np.datetime64('now'), da.end)
- print(df[da.end - 5:da.end + 5])
- # print('next', hsi.pos, hsi.last)
- # jtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
- # time_series, data = jtst.get_existing_records()
- # ja = SierraFrame(time_series, data)
- # jf = ja.df
- # print('\n', ja.end)
- # print(df[ja.end-5:ja.end+5])
- # print('next', jtst.pos, jtst.last)
- # return # ###################
- counter = 0
- # sys.stdout = os.fdopen(sys.stdout.fileno(), "w", newline=None)
- counter_flag = False
- timer_no_data = time()
- timer_no_data_flag = False
- overruns = 0
- overrun_list = []
- while True:
- time0 = time()
- new_time, data, late_flag = hsi.get_record()
- if new_time != -999:
- da.add(new_time, data)
- wtst.save_record(da)
- if counter > 3:
- time_list.append(time() - time0)
- timer_no_data = time()
- # print(da.df[da.end-1:da.end], da.end)
- print('.', end=' ')
- sys.stdout.flush()
- if timer_no_data_flag:
- print('Data Restored')
- timer_no_data = time()
- timer_no_data_flag = False
- counter += 1
- counter_flag = True
- if time() - timer_no_data >= 120 and not timer_no_data_flag:
- timer_no_data_flag = True
- print('Data lost for two minutes')
- if not late_flag:
- sleep_time = 0.1 - (time() - time0)
- if sleep_time > 0:
- sleep(sleep_time)
- if counter % 12 == 0 and counter_flag:
- counter_flag = False
- print(' Overruns:', overruns, overrun_list, end=' ')
- print('TimeStats', max(time_list), sum(time_list) / len(time_list),
- '\n', end=' ')
- # print(df[da.end-1:da.end])
- sys.stdout.flush()
- # break
- def main():
- SierraRun()
- if __name__ == '__main__':
- """
- Takes a SierraChart scid file (input argument 1) and converts
- it to a Pandas DataFrame
- Timezone conversion can follow the users local timezone, or a
- specified integer (input l or an integer but if the default
- filename is being used, '' must be specified for the filename)
- """
- print('start')
- sys.stdout.flush()
- main()
- print('fin')
- print('Overruns:', overruns, overrun_list)
- if time_list != []:
- print('TimeStats', max(time_list), sum(time_list) / len(time_list))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement