Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from datetime import datetime
- from fastparquet import write
- def compute_vwap(df):
- q = df['foreignNotional']
- p = df['price']
- vwap = np.sum(p * q) / np.sum(q)
- df['vwap'] = vwap
- return df
- def ohlc(df):
- df['open'] = df.price.iloc[0]
- df['high'] = df.price.max()
- df['low'] = df.price.min()
- df['close'] = df.price.iloc[-1]
- return df[-1:]
- data = pd.read_csv('data/20181204.csv')
- data = data[data.symbol == 'XBTUSD']
- paths = ['data/20181205.csv','data/20181206.csv','data/20181207.csv', 'data/20181208.csv', 'data/20181209.csv']
- for path in paths:
- df = pd.read_csv(path)
- df = df[df.symbol == 'XBTUSD']
- data = data.append(df)
- data['timestamp'] = data.timestamp.map(lambda t: datetime.strptime(t[:-3], "%Y-%m-%dD%H:%M:%S.%f")) # timestamp parsing
- data.set_index('timestamp', inplace=True)
- data.sort_index(inplace=True)
- data_cm_dollar = data.assign(cmDollar=data['foreignNotional'].cumsum())
- total_dollars = data_cm_dollar.cmDollar.values[-1]
- dollars_per_bar = 2e6
- print('Total dollars:', total_dollars)
- print('Dollars per bar:', dollars_per_bar)
- data_dollar_grp = data_cm_dollar.assign(grpId=lambda row: row.cmDollar // dollars_per_bar)
- print('Number of dollar bars:', len(data_dollar_grp.groupby(['grpId'])))
- data_dollar_ohlc = data_dollar_grp.groupby('grpId').apply(lambda x: ohlc(compute_vwap(x)))
- data_dollar_ohlc.index = data_dollar_ohlc.index.droplevel()
- data_dollar_ohlc = data_dollar_ohlc[~data_dollar_ohlc.index.duplicated(keep='first')]
- # save to file
- write('data_dollar_ohlc.pq', data_dollar_ohlc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement