1. #!/usr/bin/python3
  2. from __future__ import print_function
  3. import numpy as np
  4. import pandas as pd
  5. import struct
  6. import sys
  7. from time import sleep, time
  8.  
  9. o = O = 'O'
  10. h = H = 'H'
  11. l = L = 'L'
  12. c = C = 'C'
  13. v = V = 'V'
  14. x = 'x'
  15. y = 'y'
  16. z = 'z'
  17.  
  18.  
  19. class SierraFrame(object):
  20.     """        """
  21.     def __init__(self, time_series, data):
  22.         self.df = pd.DataFrame(data, index=time_series,
  23.                                columns=[O, H, L, C, V, x, y, z])
  24.         self.df.iloc[:] = 1000.0
  25.         # self.df[:][5:8] = self.df[:][5:8].astype('int')
  26.         self.end = len(self.df)
  27.         self.pos = 0
  28.  
  29.     def extend_frame1(self):
  30.         time0 = time()
  31.         dtstrip = [i for i in range(10000)]
  32.         dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
  33.         dg.iloc[:] = 0.0
  34.         self.df = self.df.append(dg)
  35.         print('Took', time() - time0, 'to extend the zeros frame')
  36.  
  37.     def extend_frame2(self):
  38.         time0 = time()
  39.         dtstrip = [i for i in range(10000)]
  40.         dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
  41.         dg.iloc[:] = 0.0
  42.         self.df = self.df.append(dg)
  43.         #
  44.         # Here is the only difference.  The last four columns are
  45.         #  converted to int64.  And as a result there is an increase
  46.         #  from 0.00004 to 0.0003  ... order of magnitued in the
  47.         #  10,000 add loops.
  48.         self.df[[v, x, y, z]] = self.df[[v, x, y, z]].astype('int')
  49.         print('Took', time() - time0, 'to extend the zeros & ints frame')
  50.  
  51.  
  52.     def add(self, new_time, datarow):
  53.         self.df.iloc[self.end] = datarow  # fill when times match
  54.         self.end += 1
  55.  
  56.  
  57. def SierraRun():
  58.     time_list = []
  59.     time0 = time()
  60.     da = SierraFrame(range(350000), np.zeros((350000,8)))
  61.     da.extend_frame1()
  62.     df = da.df
  63.     print('df ready', da.end)
  64.     print(df[da.end - 5:da.end + 5])
  65.     for i in range(10000):
  66.         time0 = time()
  67.         da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
  68.         time_list.append(time() - time0)
  69.     if time_list:
  70.         print('TimeStats', max(time_list),
  71.                 sum(time_list) / len(time_list))
  72.     print('\nEnd of NaN version')
  73.  
  74.     time_list = []
  75.     time0 = time()
  76.     da = SierraFrame(range(350000), np.zeros((350000,8)))
  77.     da.extend_frame2()
  78.     df = da.df
  79.     print('df ready', da.end)
  80.     print(df[da.end - 5:da.end + 5])
  81.     for i in range(10000):
  82.         time0 = time()
  83.         da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
  84.         time_list.append(time() - time0)
  85.     if time_list:
  86.         print('TimeStats', max(time_list),
  87.                 sum(time_list) / len(time_list))
  88.     print('\nEnd of NaN version')
  89.     import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
  90.  
  91.  
  92.  
  93. def main():
  94.     SierraRun()
  95.  
  96. if __name__ == '__main__':
  97.     """
  98.    Takes a SierraChart scid file (input argument 1) and converts
  99.      it to a Pandas DataFrame
  100.    Timezone conversion can follow the users local timezone, or a
  101.      specified integer (input l or an integer but if the default
  102.      filename is being used, '' must be specified for the filename)
  103.    """
  104.     print('start')
  105.     sys.stdout.flush()
  106.     main()
  107.     print('fin')