Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- from __future__ import print_function
- import numpy as np
- import pandas as pd
- import struct
- import sys
- from time import sleep, time
- o = O = 'O'
- h = H = 'H'
- l = L = 'L'
- c = C = 'C'
- v = V = 'V'
- x = 'x'
- y = 'y'
- z = 'z'
- class SierraFrame(object):
- """ """
- def __init__(self, time_series, data):
- self.df = pd.DataFrame(data, index=time_series,
- columns=[O, H, L, C, V, x, y, z])
- self.df.iloc[:] = 1000.0
- # self.df[:][5:8] = self.df[:][5:8].astype('int')
- self.end = len(self.df)
- self.pos = 0
- def extend_frame1(self):
- time0 = time()
- dtstrip = [i for i in range(10000)]
- dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
- dg.iloc[:] = 0.0
- self.df = self.df.append(dg)
- print('Took', time() - time0, 'to extend the zeros frame')
- def extend_frame2(self):
- time0 = time()
- dtstrip = [i for i in range(10000)]
- dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
- dg.iloc[:] = 0.0
- self.df = self.df.append(dg)
- #
- # Here is the only difference. The last four columns are
- # converted to int64. And as a result there is an increase
- # from 0.00004 to 0.0003 ... order of magnitued in the
- # 10,000 add loops.
- self.df[[v, x, y, z]] = self.df[[v, x, y, z]].astype('int')
- print('Took', time() - time0, 'to extend the zeros & ints frame')
- def add(self, new_time, datarow):
- self.df.iloc[self.end] = datarow # fill when times match
- self.end += 1
- def SierraRun():
- time_list = []
- time0 = time()
- da = SierraFrame(range(350000), np.zeros((350000,8)))
- da.extend_frame1()
- df = da.df
- print('df ready', da.end)
- print(df[da.end - 5:da.end + 5])
- for i in range(10000):
- time0 = time()
- da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
- time_list.append(time() - time0)
- if time_list:
- print('TimeStats', max(time_list),
- sum(time_list) / len(time_list))
- print('\nEnd of NaN version')
- time_list = []
- time0 = time()
- da = SierraFrame(range(350000), np.zeros((350000,8)))
- da.extend_frame2()
- df = da.df
- print('df ready', da.end)
- print(df[da.end - 5:da.end + 5])
- for i in range(10000):
- time0 = time()
- da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
- time_list.append(time() - time0)
- if time_list:
- print('TimeStats', max(time_list),
- sum(time_list) / len(time_list))
- print('\nEnd of NaN version')
- import ipdb; ipdb.set_trace() # XXX BREAKPOINT
- def main():
- SierraRun()
- if __name__ == '__main__':
- """
- Takes a SierraChart scid file (input argument 1) and converts
- it to a Pandas DataFrame
- Timezone conversion can follow the users local timezone, or a
- specified integer (input l or an integer but if the default
- filename is being used, '' must be specified for the filename)
- """
- print('start')
- sys.stdout.flush()
- main()
- print('fin')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement