Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def get_data (symbol='spy', interval='1d', first_date='max', last_date='max'):
- """
- Params: (symbol, interval, first_date, last_date).
- Defaults: ('SPY' , '1d' , 'max' , 'max' ).
- Pulls timeseries data from yfinance and returns a DataFrame with the
- following columns: [Symbol, Open, High, Low, Close, Log Return, lnRange, lnvolume]
- Enter dates as str 'YYYY-MM-DD'.
- Valid Intervals:
- 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
- """
- #imports
- import numpy as np
- import yfinance as yf
- #no dates case
- if first_date == 'max' and last_date == 'max':
- #get data from yahoo
- data = yf.download(symbol.upper(), interval=interval.lower(),
- period='max')
- #create df
- data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
- #copy and structure
- df = data.copy()
- df['log'] = np.log(df['Close'])
- df['log_return'] = df['log'].pct_change()
- df['lnrange'] = np.log(df['High']/df['Low'])-1
- df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
- 'lnrange']]
- df['lnvolume'] = np.log(df['Volume'])
- df['log_return_volume'] = df['lnvolume'].pct_change()
- df['range'] = (df["High"] / df["Low"]) - 1
- df.dropna(inplace=True)
- #append symbol and interval columns handling defaults
- if symbol == 'spy':
- df.insert(0, 'symbol', 'SPY')
- else:
- df.insert(0, 'symbol', symbol.upper())
- if interval == '1d':
- df.insert(1,'interval','1D')
- else:
- df.insert(1, 'interval', interval.upper())
- return df
- #both dates case
- if first_date != 'max' and last_date != 'max':
- #get data from yahoo
- data = yf.download(symbol.upper(), interval=interval,
- start=first_date, end=last_date)
- #create df
- data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
- #copy and structure
- df = data.copy()
- df['log'] = np.log(df['Close'])
- df['log_return'] = df['log'].pct_change()
- df['lnrange'] = np.log(df['High']/df['Low'])-1
- df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
- 'lnrange']]
- df['lnvolume'] = np.log(df['Volume'])
- df['log_return_volume'] = df['lnvolume'].pct_change()
- df['range'] = (df["High"] / df["Low"]) - 1
- df.dropna(inplace=True)
- #append symbol and interval columns handling defaults
- if symbol == 'spy':
- df.insert(0, 'symbol', 'SPY')
- else:
- df.insert(0, 'symbol', symbol.upper())
- if interval == '1d':
- df.insert(1,'interval','1D')
- else:
- df.insert(1, 'interval', interval.upper())
- return df
- #first date only case
- if first_date != 'max' and last_date == 'max':
- #import
- from datetime import datetime, timedelta
- #handle when non-trading day
- today = datetime.today()
- offset = max(1,(today.weekday()+6) %7-3)
- timed = timedelta(offset)
- today_business = today - timed
- end_date = today_business.strftime('%Y-%m-%d')
- #get data from yahoo
- data = yf.download(symbol.upper(), interval=interval, start=first_date,
- end=end_date)
- #create df
- data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
- #copy and structure
- df = data.copy()
- df['log'] = np.log(df['Close'])
- df['log_return'] = df['log'].pct_change()
- df['lnrange'] = np.log(df['High']/df['Low'])-1
- df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
- 'lnrange']]
- df['lnvolume'] = np.log(df['Volume'])
- df['log_return_volume'] = df['lnvolume'].pct_change()
- df['range'] = (df["High"] / df["Low"]) - 1
- df.dropna(inplace=True)
- #append symbol and interval columns handling defaults
- if symbol == 'spy':
- df.insert(0, 'symbol', 'SPY')
- else:
- df.insert(0, 'symbol', symbol.upper())
- if interval == '1d':
- df.insert(1,'interval','1D')
- else:
- df.insert(1, 'interval', interval.upper())
- return df
- #last date only case
- if first_date == 'max' and last_date != 'max':
- #get data from yahoo
- data = yf.download(symbol.upper(), interval=interval,
- start='1970-01-02', end=last_date)
- #create df
- data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
- #copy and structure
- df = data.copy()
- df['log'] = np.log(df['Close'])
- df['log_return'] = df['log'].pct_change()
- df['lnrange'] = np.log(df['High']/df['Low'])-1
- df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
- 'lnrange']]
- df['lnvolume'] = np.log(df['Volume'])
- df['log_return_volume'] = df['lnvolume'].pct_change()
- df['range'] = (df["High"] / df["Low"]) - 1
- df.dropna(inplace=True)
- #append symbol and interval columns handling defaults
- if symbol == 'spy':
- df.insert(0, 'symbol', 'SPY')
- else:
- df.insert(0, 'symbol', symbol.upper())
- if interval == '1d':
- df.insert(1,'interval','1D')
- else:
- df.insert(1, 'interval', interval.upper())
- return df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement