Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- from __future__ import print_function
- import numpy as np
- import pandas as pd
- import struct
- import sys
- from time import sleep, time
- #from SC_Variables import *
- time_list = []
- overrun_list = []
- overruns = 0
- lt = 15
- mt = 5
- st = 1
- ohlc = {'O': 'first', 'H': 'max', 'L': 'min', 'C': 'last',
- 'V': 'sum', 'x': 'sum', 'y': 'sum', 'z': 'sum'}
- cols = ['O', 'H', 'L', 'C', 'V', 'x', 'y', 'z']
- time_list = []
- class SierraFile(object):
- """ """
- def __init__(self, filename):
- self.filename = str(filename)
- # self.tzAdjust = t imedelta(hours=+10).seconds/d2s
- self.tzAdjust = np.timedelta64(10, 'h') / np.timedelta64(1, 'D')
- self.excelDate = np.datetime64('1899-12-30')
- self.sizeHeader = 0x38
- self.sizeRecord = 0x28
- self.pos = 0
- self.last = 0
- def read_existing_records(self):
- with open(self.filename, 'rb') as fscid:
- fscid.read(self.sizeHeader) # discard header
- rows = []
- ts = []
- for i in range(1000000):
- data = fscid.read(self.sizeRecord)
- if data not in ('', b''):
- d = struct.unpack('d4f4I', data)
- dt = d[0] + self.tzAdjust
- ts.append(self.excelDate + np.timedelta64(int(dt))
- + (np.timedelta64(int(round((dt - int(dt))
- * 86400)), 's')))
- datarow = [d[1], d[2], d[3], d[4], d[5], 0, 0, 0]
- rows.append(datarow)
- else:
- break
- self.pos = self.last = fscid.tell()
- return (ts, rows)
- def read_record(self):
- global overruns, overrun_list
- with open(self.filename, 'rb') as fscid:
- fscid.seek(0, 2) # Go to the end of the file
- self.last = fscid.tell()
- if self.last == self.pos: # no new data >> nothing to do
- return (-999, 0, 0)
- else: # data to collect
- if self.pos < self.last - self.sizeRecord: # > 1 record
- print('Overrun', self.last - self.pos,
- (self.last - self.pos) / self.sizeRecord)
- overruns += 1
- overrun_list.append(np.datetime64('now'))
- late_flag = True
- else:
- late_flag = False
- fscid.seek(self.pos, 0)
- self.pos += self.sizeRecord
- data = fscid.read(self.sizeRecord)
- d = struct.unpack('d4f4I', data)
- dt = d[0] + self.tzAdjust
- new_time = (self.excelDate + np.timedelta64(int(dt))
- + (np.timedelta64(int(round((dt - int(dt))
- * 86400)), 's')))
- datarow = [d[1], d[2], d[3], d[4], d[5], 0, 0, 0]
- return (new_time, datarow, late_flag)
- def write_existing_records(self, dataframe):
- with open(self.filename, 'wb') as fscid:
- header = b'SCID8\x00\x00\x00(\x00\x00\x00\x01\x00'
- fscid.write(header)
- for i in range(21):
- fscid.write(b'\x00\x00')
- for i in range(dataframe.end):
- da = ((dataframe.df.index.values[i] - self.excelDate)
- / np.timedelta64(1, 'D') - self.tzAdjust)
- db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i][0:8]
- # include [0:8] so that dataframe can have more than 8 columns
- di = i # 0x11100111
- df = int(df)
- dg = int(dg)
- dh = int(dh)
- di = int(di)
- wt = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
- fscid.write(wt)
- def write_record(self, dataframe):
- with open(self.filename, 'ab') as fscid:
- i = dataframe.end - 1
- da = ((dataframe.df.index.values[i] - self.excelDate)
- / np.timedelta64(1, 'D') - self.tzAdjust)
- db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i][0:8]
- di = 0x88300388
- df = int(df)
- dg = int(dg)
- dh = int(dh)
- di = int(di)
- record = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
- fscid.write(record)
- class SierraFrame(object):
- """
- DataFrame is the basic object for analysis:
- init reads the .scid file into the initial object, 5 sec assumed
- extend_frame adds 5000 rows to the df because appending rows is slow
- add appends new data in the extended frame for real time operation
- build_tf creates a new dataframe that is a multiplier of the input df
- build_htf_array creates an array showing higher timeframe bars as
- they develop for the lower timeframe array
- countfloats is a test method
- """
- def __init__(self, time_index, data):
- self.df = pd.DataFrame(data, index=time_index,
- columns=['O', 'H', 'L', 'C', 'V', 'x', 'y', 'z'])
- self.end = len(self.df)
- self.pos = 0
- def extend_frame(self):
- '''
- Create a 5000 row array from last time in self.df
- and append it to self.df
- Remove lunch break from array
- '''
- print('Extending DataFrame Now')
- s5 = np.timedelta64(5, 's')
- h1 = np.timedelta64(1, 'h')
- sl = np.datetime64('today') + np.timedelta64(14, 'h')
- el = np.datetime64('today') + np.timedelta64(15, 'h')
- start_time = self.df.index.values[self.end - 1]
- dtgen = ((start_time + i * s5) for i in range(1, 5000))
- dtstrip = ((i + h1 if sl <= i < el else i) for i in dtgen)
- dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
- #dg.iloc[:] = 0.0
- #dg[[v, x, y, z]] = dg[[v, x, y, z]].astype('int')
- self.df = self.df.append(dg)
- self.df = self.df.astype(np.float64)
- def add(self, new_time, datarow):
- '''
- Add a row to an existing extended df but:
- extend if its within 5 of the end
- fill with last bar if its not the next bar
- convert the four integer columns to float for df speed of access
- '''
- if self.end > len(self.df) - 5:
- self.extend_frame() # not needed if first fill > day length
- np_time = np.datetime64(new_time)
- if np_time < self.df.index.values[self.end]:
- return # new data is earlier than current
- while np_time > self.df.index.values[self.end]:
- self.df.iloc[self.end] = self.df.iloc[self.end - 1]
- self.end += 1 # fill with prior row if new is later
- for i in [4, 5, 6, 7]:
- datarow[i] = float(datarow[i])
- self.df.iloc[self.end] = datarow # fill when times match
- #self.df.iloc[self.end] = self.df.iloc[self.end].astype(np.float64)
- self.end += 1
- def build_tf(self, ht):
- '''
- Create higher timeframe df that is a multiplier of the input, di
- with ht being the high timeframe bar length in minutes
- '''
- return self.df.resample(str(ht)+'min', how=ohlc)[cols]
- def build_htf_array(self, st, ht):
- '''
- Map higher timeframe development on to input df
- with ht being the high timeframe bar length in minutes
- '''
- di = self.df.resample(str(st)+'min', how=ohlc)[cols]
- dih = di.iloc[:,0:5]
- for i in range(len(dih)):
- if i == 0 or i//ht > (i-1)//ht:
- bO = dih.iloc[i, 0]
- bH = dih.iloc[i, 1]
- bL = dih.iloc[i, 2]
- bC = dih.iloc[i, 3]
- else:
- dih.iloc[i, 0] = bO
- dih.iloc[i, 1] = bH = max(bH, dih.iloc[i, 1])
- dih.iloc[i, 2] = bL = min(bL, dih.iloc[i, 2])
- bC = dih.iloc[i, 3]
- return dih
- def countfloats(self):
- length = len (self.df)
- width = len(self.df.iloc[0])
- floats = 0
- nonfloats = 0
- for i in range(length):
- for j in range(width):
- if isinstance(self.df.iloc[i,j], float):
- floats += 1
- else:
- nonfloats += 1
- return (floats, nonfloats)
- def build_htf_array(di, ht):
- '''
- Map higher timeframe development on to input df
- with ht being the high timeframe bar length in minutes
- '''
- dih = di.iloc[:,0:5].copy()
- for i in range(len(dih)):
- if i == 0 or i//ht > (i-1)//ht:
- bO = dih.iloc[i, 0]
- bH = dih.iloc[i, 1]
- bL = dih.iloc[i, 2]
- bC = dih.iloc[i, 3]
- else:
- dih.iloc[i, 0] = bO
- dih.iloc[i, 1] = bH = max(bH, dih.iloc[i, 1])
- dih.iloc[i, 2] = bL = min(bL, dih.iloc[i, 2])
- bC = dih.iloc[i, 3]
- return dih
- def build_tf(di, ht):
- '''
- Create higher timeframe df that is a multiplier of the input, di
- with ht being the high timeframe bar length in minutes
- '''
- return di.resample(str(ht)+'min', how=ohlc)[cols]
- def dt64_to_str(dt64):
- '''
- Convert numpy datetime64 to string in Ilya's format.
- '''
- s = str(dt64)
- return s[0:4]+s[5:7]+s[8:10]+s[11:13]+s[14:16]+s[17:19]
- def str_to_dt64(s):
- '''
- Convert string in Ilya's format to numpy datetime64.
- '''
- date = s[0:4] + '-' + s[4:6] + '-' + s[6:8]
- time = 'T' + s[8:10] + ':' + s[10:12] + ':' + s[12:14]
- return np.datetime64(date + time)
- def SierraRun():
- global time_list
- time0 = time()
- #filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201306-HKFE-TD.scid'
- filename = '/home/john/zRamdisk/SierraChart/Data/HSIM13-FUT-HKFE-TD.scid'
- hsi = SierraFile(filename)
- time_index, data = hsi.read_existing_records()
- da = SierraFrame(time_index, data)
- import ipdb; ipdb.set_trace() # XXX BREAKPOINT
- da.extend_frame()
- wtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
- wtst.write_existing_records(da)
- print('df ready', da.end - 1, time() - time0)
- print(da.df[da.end - 1:da.end + 1])
- print()
- df = da.df
- print('\n', np.datetime64('now'), da.end)
- print(df[da.end - 5:da.end + 5])
- import ipdb; ipdb.set_trace() # XXX BREAKPOINT
- #time_list = []
- #for i in range(4000):
- #intime = df.index.values[da.end]
- #time0 = time()
- #da.add(intime, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
- #time_list.append(time() - time0)
- #if time_list:
- #print('TimeStats', max(time_list),
- #sum(time_list) / len(time_list))
- #print('\nEnd of NaN version')
- # print('next', hsi.pos, hsi.last)
- # jtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
- # time_index, data = jtst.read_existing_records()
- # ja = SierraFrame(time_index, data)
- # jf = ja.df
- # print('\n', ja.end)
- # print(df[ja.end-5:ja.end+5])
- # print('next', jtst.pos, jtst.last)
- # return # ###################
- counter = 0
- # sys.stdout = os.fdopen(sys.stdout.fileno(), "w", newline=None)
- counter_flag = False
- timer_no_data = time()
- timer_no_data_flag = False
- overruns = 0
- overrun_list = []
- while True:
- time0 = time()
- new_time, data, late_flag = hsi.read_record()
- if new_time != -999:
- #time1 = time()
- da.add(new_time, data)
- #print("{:.6f}".format(time() - time1), end = ' ')
- sys.stdout.flush()
- wtst.write_record(da)
- if counter > 3:
- time_list.append(time() - time0)
- timer_no_data = time()
- #print(da.df[da.end-1:da.end], da.end)
- print('.', end=' ')
- sys.stdout.flush()
- if timer_no_data_flag:
- print('Data Restored')
- timer_no_data = time()
- timer_no_data_flag = False
- counter += 1
- counter_flag = True
- if time() - timer_no_data >= 120 and not timer_no_data_flag:
- timer_no_data_flag = True
- print('Data lost for two minutes')
- if not late_flag:
- sleep_time = 0.1 - (time() - time0)
- if sleep_time > 0:
- sleep(sleep_time)
- if counter % 12 == 0 and counter_flag:
- counter_flag = False
- print(' Overruns:', overruns, overrun_list, end=' ')
- print('TimeStats', "{:.6f} {:.6f}".format(max(time_list),
- sum(time_list) / len(time_list)), '\n', end=' ')
- # print(df[da.end-1:da.end])
- sys.stdout.flush()
- # break
- if counter % 60 == 0 and counter != 0:
- import ipdb; ipdb.set_trace() # XXX BREAKPOINT
- def main():
- SierraRun()
- if __name__ == '__main__':
- """
- Takes a SierraChart scid file (input argument 1) and converts
- it to a Pandas DataFrame
- Timezone conversion can follow the users local timezone, or a
- specified integer (input l or an integer but if the default
- filename is being used, '' must be specified for the filename)
- """
- print('start')
- sys.stdout.flush()
- main()
- print('fin')
- if time_list != []:
- print('TimeStats', "{:.6f} {:.6f}".format(max(time_list),
- sum(time_list) / len(time_list)), '\n', end=' ')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement