SHARE
TWEET

Simplified Test version

user_137 Jun 17th, 2013 44 Never
  1. #!/usr/bin/python3
  2. from __future__ import print_function
  3. import numpy as np
  4. import pandas as pd
  5. import struct
  6. import sys
  7. from time import sleep, time
  8.  
  9. o = O = 'O'
  10. h = H = 'H'
  11. l = L = 'L'
  12. c = C = 'C'
  13. v = V = 'V'
  14. x = 'x'
  15. y = 'y'
  16. z = 'z'
  17.  
  18.  
  19. class SierraFrame(object):
  20.     """        """
  21.     def __init__(self, time_series, data):
  22.         self.df = pd.DataFrame(data, index=time_series,
  23.                                columns=[O, H, L, C, V, x, y, z])
  24.         self.df.iloc[:] = 1000.0
  25.         # self.df[:][5:8] = self.df[:][5:8].astype('int')
  26.         self.end = len(self.df)
  27.         self.pos = 0
  28.  
  29.     def extend_frame1(self):
  30.         time0 = time()
  31.         dtstrip = [i for i in range(10000)]
  32.         dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
  33.         dg.iloc[:] = 0.0
  34.         self.df = self.df.append(dg)
  35.         print('Took', time() - time0, 'to extend the zeros frame')
  36.  
  37.     def extend_frame2(self):
  38.         time0 = time()
  39.         dtstrip = [i for i in range(10000)]
  40.         dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
  41.         dg.iloc[:] = 0.0
  42.         self.df = self.df.append(dg)
  43.         #
  44.         # Here is the only difference.  The last four columns are
  45.         #  converted to int64.  And as a result there is an increase
  46.         #  from 0.00004 to 0.0003  ... order of magnitued in the
  47.         #  10,000 add loops.
  48.         self.df[[v, x, y, z]] = self.df[[v, x, y, z]].astype('int')
  49.         print('Took', time() - time0, 'to extend the zeros & ints frame')
  50.  
  51.  
  52.     def add(self, new_time, datarow):
  53.         self.df.iloc[self.end] = datarow  # fill when times match
  54.         self.end += 1
  55.  
  56.  
  57. def SierraRun():
  58.     time_list = []
  59.     time0 = time()
  60.     da = SierraFrame(range(350000), np.zeros((350000,8)))
  61.     da.extend_frame1()
  62.     df = da.df
  63.     print('df ready', da.end)
  64.     print(df[da.end - 5:da.end + 5])
  65.     for i in range(10000):
  66.         time0 = time()
  67.         da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
  68.         time_list.append(time() - time0)
  69.     if time_list:
  70.         print('TimeStats', max(time_list),
  71.                 sum(time_list) / len(time_list))
  72.     print('\nEnd of NaN version')
  73.  
  74.     time_list = []
  75.     time0 = time()
  76.     da = SierraFrame(range(350000), np.zeros((350000,8)))
  77.     da.extend_frame2()
  78.     df = da.df
  79.     print('df ready', da.end)
  80.     print(df[da.end - 5:da.end + 5])
  81.     for i in range(10000):
  82.         time0 = time()
  83.         da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
  84.         time_list.append(time() - time0)
  85.     if time_list:
  86.         print('TimeStats', max(time_list),
  87.                 sum(time_list) / len(time_list))
  88.     print('\nEnd of NaN version')
  89.     import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
  90.  
  91.  
  92.  
  93. def main():
  94.     SierraRun()
  95.  
  96. if __name__ == '__main__':
  97.     """
  98.    Takes a SierraChart scid file (input argument 1) and converts
  99.      it to a Pandas DataFrame
  100.    Timezone conversion can follow the users local timezone, or a
  101.      specified integer (input l or an integer but if the default
  102.      filename is being used, '' must be specified for the filename)
  103.    """
  104.     print('start')
  105.     sys.stdout.flush()
  106.     main()
  107.     print('fin')
RAW Paste Data
Top