Advertisement
user_137

SierraFile and SierraFrame Classes

Jun 17th, 2013
248
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.65 KB | None | 0 0
  1. #!/usr/bin/python3
  2. from __future__ import print_function
  3. import numpy as np
  4. import pandas as pd
  5. import struct
  6. import sys
  7. from time import sleep, time
  8.  
  9. o = O = 'O'
  10. h = H = 'H'
  11. l = L = 'L'
  12. c = C = 'C'
  13. v = V = 'V'
  14. x = 'x'
  15. y = 'y'
  16. z = 'z'
  17.  
  18. time_list = []
  19. overrun_list = []
  20. overruns = 0
  21.  
  22.  
  23. class SierraFile(object):
  24.     """        """
  25.     def __init__(self, filename):
  26.         self.filename = str(filename)
  27.         # self.tzAdjust = t imedelta(hours=+10).seconds/d2s
  28.         self.tzAdjust = np.timedelta64(10, 'h') / np.timedelta64(1, 'D')
  29.         self.excelDate = np.datetime64('1899-12-30')
  30.         self.sizeHeader = 0x38
  31.         self.sizeRecord = 0x28
  32.         self.pos = 0
  33.         self.last = 0
  34.  
  35.     def get_existing_records(self):
  36.         with open(self.filename, 'rb') as fscid:
  37.             fscid.read(self.sizeHeader)  # discard header
  38.             rows = []
  39.             ts = []
  40.             for i in range(1000000):
  41.                 data = fscid.read(self.sizeRecord)
  42.                 if data not in ('', b''):
  43.                         d = struct.unpack('d4f4I', data)
  44.                         dt = d[0] + self.tzAdjust
  45.                         ts.append(self.excelDate + np.timedelta64(int(dt))
  46.                                   + (np.timedelta64(int(round((dt - int(dt))
  47.                                      * 86400)), 's')))
  48.                         datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0]
  49.                         rows.append(datarow)
  50.                 else:
  51.                     break
  52.             self.pos = self.last = fscid.tell()
  53.         return (ts, rows)
  54.  
  55.     def get_record(self):
  56.         global overruns, overrun_list
  57.         with open(self.filename, 'rb') as fscid:
  58.             fscid.seek(0, 2)  # Go to the end of the file
  59.             self.last = fscid.tell()
  60.             if self.last == self.pos:  # no new data >> nothing to do
  61.                 return (-999, 0, 0)
  62.             else:  # data to collect
  63.                 if self.pos < self.last - self.sizeRecord:  # > 1 record
  64.                     print('Overrun', self.last - self.pos,
  65.                           (self.last - self.pos) / self.sizeRecord)
  66.                     overruns += 1
  67.                     overrun_list.append(np.datetime64('now'))
  68.                     late_flag = True
  69.                 else:
  70.                     late_flag = False
  71.                 fscid.seek(self.pos, 0)
  72.                 self.pos += self.sizeRecord
  73.                 data = fscid.read(self.sizeRecord)
  74.                 d = struct.unpack('d4f4I', data)
  75.                 dt = d[0] + self.tzAdjust
  76.                 new_time = (self.excelDate + np.timedelta64(int(dt))
  77.                             + (np.timedelta64(int(round((dt - int(dt))
  78.                                * 86400)), 's')))
  79.                 datarow = [d[1], d[2], d[3], d[4], 0, 0, 0, 0]
  80.                 return (new_time, datarow, late_flag)
  81.  
  82.     def save_existing_records(self, dataframe):
  83.         with open(self.filename, 'wb') as fscid:
  84.             header = b'SCID8\x00\x00\x00(\x00\x00\x00\x01\x00'
  85.             fscid.write(header)
  86.             for i in range(21):
  87.                 fscid.write(b'\x00\x00')
  88.             for i in range(dataframe.end):
  89.                 da = ((dataframe.df.index.values[i] - self.excelDate)
  90.                       / np.timedelta64(1, 'D') - self.tzAdjust)
  91.                 db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
  92.                 di = 0x11100111
  93.                 wt = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
  94.                 fscid.write(wt)
  95.  
  96.     def save_record(self, dataframe):
  97.         with open(self.filename, 'ab') as fscid:
  98.             i = dataframe.end - 1
  99.             da = ((dataframe.df.index.values[i] - self.excelDate)
  100.                   / np.timedelta64(1, 'D') - self.tzAdjust)
  101.             db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
  102.             di = 0x88300388
  103.             record = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
  104.             fscid.write(record)
  105.  
  106.  
  107. class SierraFrame(object):
  108.     """        """
  109.     def __init__(self, time_series, data):
  110.         self.df = pd.DataFrame(data, index=time_series,
  111.                                columns=[O, H, L, C, V, x, y, z])
  112.         self.end = len(self.df)
  113.         self.pos = 0
  114.         self.extend_frame()
  115.  
  116.     def extend_frame(self):
  117.         '''
  118.        Create a 4999 row array from last time in self.df
  119.         and append it to self.df
  120.        Remove lunch break from array
  121.        '''
  122.         print('Extending DataFrame Now')
  123.         s5 = np.timedelta64(5, 's')
  124.         h1 = np.timedelta64(1, 'h')
  125.         sl = np.datetime64('today') + np.timedelta64(14, 'h')
  126.         el = np.datetime64('today') + np.timedelta64(15, 'h')
  127.         start_time = self.df.index.values[self.end - 1]
  128.         dtgen = ((start_time + i * s5) for i in range(1, 5000))
  129.         dtstrip = ((i + h1 if sl <= i < el else i) for i in dtgen)
  130.         dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
  131.         # dg.iloc[0:5000] = 0.0
  132.         # dg[[v, x, y, z]] = dg[[v, x, y, z]].astype('int')
  133.         self.df = self.df.append(dg)
  134.  
  135.     def add(self, new_time, datarow):
  136.         if self.end > len(self.df) - 5:
  137.             self.extend_frame()  # not needed if first fill > day length
  138.         np_time = np.datetime64(new_time)
  139.         if np_time < self.df.index.values[self.end]:
  140.             return  # new data is earlier than current
  141.         while np_time > self.df.index.values[self.end]:
  142.             self.df.iloc[self.end] = self.df.iloc[self.end - 1]
  143.             self.end += 1  # fill with prior row if new is later
  144.         self.df.iloc[self.end] = datarow  # fill when times match
  145.         self.end += 1
  146.  
  147.  
  148. def SierraRun():
  149.     global time_list
  150.     time0 = time()
  151.     filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201306-HKFE-TD.scid'
  152.     hsi = SierraFile(filename)
  153.     time_series, data = hsi.get_existing_records()
  154.     da = SierraFrame(time_series, data)
  155.     wtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
  156.     wtst.save_existing_records(da)
  157.     print('df ready', da.end - 1, time() - time0)
  158.     print(da.df[da.end - 1:da.end + 1])
  159.     print()
  160.     df = da.df
  161.     print('\n', np.datetime64('now'), da.end)
  162.     print(df[da.end - 5:da.end + 5])
  163.     # print('next', hsi.pos, hsi.last)
  164.     # jtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
  165.     # time_series, data = jtst.get_existing_records()
  166.     # ja = SierraFrame(time_series, data)
  167.     # jf = ja.df
  168.     # print('\n', ja.end)
  169.     # print(df[ja.end-5:ja.end+5])
  170.     # print('next', jtst.pos, jtst.last)
  171.     # return  # ###################
  172.     counter = 0
  173.     # sys.stdout = os.fdopen(sys.stdout.fileno(), "w", newline=None)
  174.     counter_flag = False
  175.     timer_no_data = time()
  176.     timer_no_data_flag = False
  177.     overruns = 0
  178.     overrun_list = []
  179.     while True:
  180.         time0 = time()
  181.         new_time, data, late_flag = hsi.get_record()
  182.         if new_time != -999:
  183.             da.add(new_time, data)
  184.             wtst.save_record(da)
  185.             if counter > 3:
  186.                 time_list.append(time() - time0)
  187.                 timer_no_data = time()
  188.             # print(da.df[da.end-1:da.end], da.end)
  189.             print('.', end=' ')
  190.             sys.stdout.flush()
  191.             if timer_no_data_flag:
  192.                 print('Data Restored')
  193.                 timer_no_data = time()
  194.                 timer_no_data_flag = False
  195.             counter += 1
  196.             counter_flag = True
  197.         if time() - timer_no_data >= 120 and not timer_no_data_flag:
  198.             timer_no_data_flag = True
  199.             print('Data lost for two minutes')
  200.         if not late_flag:
  201.             sleep_time = 0.1 - (time() - time0)
  202.             if sleep_time > 0:
  203.                 sleep(sleep_time)
  204.         if counter % 12 == 0 and counter_flag:
  205.             counter_flag = False
  206.             print('   Overruns:', overruns, overrun_list, end='    ')
  207.             print('TimeStats', max(time_list), sum(time_list) / len(time_list),
  208.                   '\n', end=' ')
  209.             # print(df[da.end-1:da.end])
  210.             sys.stdout.flush()
  211.             # break
  212.  
  213.  
  214. def main():
  215.     SierraRun()
  216.  
  217. if __name__ == '__main__':
  218.     """
  219.    Takes a SierraChart scid file (input argument 1) and converts
  220.      it to a Pandas DataFrame
  221.    Timezone conversion can follow the users local timezone, or a
  222.      specified integer (input l or an integer but if the default
  223.      filename is being used, '' must be specified for the filename)
  224.    """
  225.     print('start')
  226.     sys.stdout.flush()
  227.     main()
  228.     print('fin')
  229.     print('Overruns:', overruns, overrun_list)
  230.     if time_list != []:
  231.         print('TimeStats', max(time_list), sum(time_list) / len(time_list))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement