Want more features on Pastebin? Sign Up, it's FREE!
Guest

Simplified Test version

By: user_137 on Jun 17th, 2013  |  syntax: Python  |  size: 3.14 KB  |  views: 38  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #!/usr/bin/python3
  2. from __future__ import print_function
  3. import numpy as np
  4. import pandas as pd
  5. import struct
  6. import sys
  7. from time import sleep, time
  8.  
  9. o = O = 'O'
  10. h = H = 'H'
  11. l = L = 'L'
  12. c = C = 'C'
  13. v = V = 'V'
  14. x = 'x'
  15. y = 'y'
  16. z = 'z'
  17.  
  18.  
  19. class SierraFrame(object):
  20.     """        """
  21.     def __init__(self, time_series, data):
  22.         self.df = pd.DataFrame(data, index=time_series,
  23.                                columns=[O, H, L, C, V, x, y, z])
  24.         self.df.iloc[:] = 1000.0
  25.         # self.df[:][5:8] = self.df[:][5:8].astype('int')
  26.         self.end = len(self.df)
  27.         self.pos = 0
  28.  
  29.     def extend_frame1(self):
  30.         time0 = time()
  31.         dtstrip = [i for i in range(10000)]
  32.         dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
  33.         dg.iloc[:] = 0.0
  34.         self.df = self.df.append(dg)
  35.         print('Took', time() - time0, 'to extend the zeros frame')
  36.  
  37.     def extend_frame2(self):
  38.         time0 = time()
  39.         dtstrip = [i for i in range(10000)]
  40.         dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
  41.         dg.iloc[:] = 0.0
  42.         self.df = self.df.append(dg)
  43.         #
  44.         # Here is the only difference.  The last four columns are
  45.         #  converted to int64.  And as a result there is an increase
  46.         #  from 0.00004 to 0.0003  ... order of magnitued in the
  47.         #  10,000 add loops.
  48.         self.df[[v, x, y, z]] = self.df[[v, x, y, z]].astype('int')
  49.         print('Took', time() - time0, 'to extend the zeros & ints frame')
  50.  
  51.  
  52.     def add(self, new_time, datarow):
  53.         self.df.iloc[self.end] = datarow  # fill when times match
  54.         self.end += 1
  55.  
  56.  
  57. def SierraRun():
  58.     time_list = []
  59.     time0 = time()
  60.     da = SierraFrame(range(350000), np.zeros((350000,8)))
  61.     da.extend_frame1()
  62.     df = da.df
  63.     print('df ready', da.end)
  64.     print(df[da.end - 5:da.end + 5])
  65.     for i in range(10000):
  66.         time0 = time()
  67.         da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
  68.         time_list.append(time() - time0)
  69.     if time_list:
  70.         print('TimeStats', max(time_list),
  71.                 sum(time_list) / len(time_list))
  72.     print('\nEnd of NaN version')
  73.  
  74.     time_list = []
  75.     time0 = time()
  76.     da = SierraFrame(range(350000), np.zeros((350000,8)))
  77.     da.extend_frame2()
  78.     df = da.df
  79.     print('df ready', da.end)
  80.     print(df[da.end - 5:da.end + 5])
  81.     for i in range(10000):
  82.         time0 = time()
  83.         da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
  84.         time_list.append(time() - time0)
  85.     if time_list:
  86.         print('TimeStats', max(time_list),
  87.                 sum(time_list) / len(time_list))
  88.     print('\nEnd of NaN version')
  89.     import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
  90.  
  91.  
  92.  
  93. def main():
  94.     SierraRun()
  95.  
  96. if __name__ == '__main__':
  97.     """
  98.    Takes a SierraChart scid file (input argument 1) and converts
  99.      it to a Pandas DataFrame
  100.    Timezone conversion can follow the users local timezone, or a
  101.      specified integer (input l or an integer but if the default
  102.      filename is being used, '' must be specified for the filename)
  103.    """
  104.     print('start')
  105.     sys.stdout.flush()
  106.     main()
  107.     print('fin')
clone this paste RAW Paste Data