#!/usr/bin/python3
from __future__ import print_function
import numpy as np
import pandas as pd
import struct
import sys
from time import sleep, time
o = O = 'O'
h = H = 'H'
l = L = 'L'
c = C = 'C'
v = V = 'V'
x = 'x'
y = 'y'
z = 'z'
class SierraFrame(object):
""" """
def __init__(self, time_series, data):
self.df = pd.DataFrame(data, index=time_series,
columns=[O, H, L, C, V, x, y, z])
self.df.iloc[:] = 1000.0
# self.df[:][5:8] = self.df[:][5:8].astype('int')
self.end = len(self.df)
self.pos = 0
def extend_frame1(self):
time0 = time()
dtstrip = [i for i in range(10000)]
dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
dg.iloc[:] = 0.0
self.df = self.df.append(dg)
print('Took', time() - time0, 'to extend the zeros frame')
def extend_frame2(self):
time0 = time()
dtstrip = [i for i in range(10000)]
dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
dg.iloc[:] = 0.0
self.df = self.df.append(dg)
#
# Here is the only difference. The last four columns are
# converted to int64. And as a result there is an increase
# from 0.00004 to 0.0003 ... order of magnitued in the
# 10,000 add loops.
self.df[[v, x, y, z]] = self.df[[v, x, y, z]].astype('int')
print('Took', time() - time0, 'to extend the zeros & ints frame')
def add(self, new_time, datarow):
self.df.iloc[self.end] = datarow # fill when times match
self.end += 1
def SierraRun():
time_list = []
time0 = time()
da = SierraFrame(range(350000), np.zeros((350000,8)))
da.extend_frame1()
df = da.df
print('df ready', da.end)
print(df[da.end - 5:da.end + 5])
for i in range(10000):
time0 = time()
da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
time_list.append(time() - time0)
if time_list:
print('TimeStats', max(time_list),
sum(time_list) / len(time_list))
print('\nEnd of NaN version')
time_list = []
time0 = time()
da = SierraFrame(range(350000), np.zeros((350000,8)))
da.extend_frame2()
df = da.df
print('df ready', da.end)
print(df[da.end - 5:da.end + 5])
for i in range(10000):
time0 = time()
da.add(1, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
time_list.append(time() - time0)
if time_list:
print('TimeStats', max(time_list),
sum(time_list) / len(time_list))
print('\nEnd of NaN version')
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
def main():
SierraRun()
if __name__ == '__main__':
"""
Takes a SierraChart scid file (input argument 1) and converts
it to a Pandas DataFrame
Timezone conversion can follow the users local timezone, or a
specified integer (input l or an integer but if the default
filename is being used, '' must be specified for the filename)
"""
print('start')
sys.stdout.flush()
main()
print('fin')