Advertisement
Guest User

Untitled

a guest
Jul 27th, 2022
32
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.79 KB | None | 0 0
  1. def get_data (symbol='spy', interval='1d', first_date='max', last_date='max'):
  2.    
  3.     """
  4.    Params:   (symbol, interval, first_date, last_date).
  5.    Defaults: ('SPY' , '1d'    , 'max'     , 'max'    ).
  6.    
  7.    Pulls timeseries data from yfinance and returns a DataFrame with the
  8.    following columns: [Symbol, Open, High, Low, Close, Log Return, lnRange, lnvolume]
  9.        
  10.    Enter dates as str 'YYYY-MM-DD'.
  11.    
  12.    Valid Intervals:
  13.    1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo  
  14.    
  15.    """
  16.    
  17.     #imports
  18.     import numpy as np
  19.     import yfinance as yf
  20.    
  21.     #no dates case
  22.     if first_date == 'max' and last_date == 'max':
  23.        
  24.         #get data from yahoo
  25.         data = yf.download(symbol.upper(), interval=interval.lower(),
  26.                            period='max')
  27.        
  28.         #create df
  29.         data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
  30.  
  31.         #copy and structure
  32.         df = data.copy()
  33.         df['log'] = np.log(df['Close'])
  34.         df['log_return'] = df['log'].pct_change()
  35.         df['lnrange'] = np.log(df['High']/df['Low'])-1
  36.         df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
  37.                  'lnrange']]
  38.         df['lnvolume'] = np.log(df['Volume'])
  39.         df['log_return_volume'] = df['lnvolume'].pct_change()
  40.         df['range'] = (df["High"] / df["Low"]) - 1
  41.         df.dropna(inplace=True)
  42.        
  43.         #append symbol and interval columns handling defaults
  44.         if symbol == 'spy':
  45.             df.insert(0, 'symbol', 'SPY')
  46.         else:
  47.             df.insert(0, 'symbol', symbol.upper())
  48.            
  49.         if interval == '1d':
  50.             df.insert(1,'interval','1D')
  51.         else:
  52.             df.insert(1, 'interval', interval.upper())
  53.        
  54.         return df
  55.  
  56.     #both dates case
  57.     if first_date != 'max' and last_date != 'max':
  58.        
  59.         #get data from yahoo
  60.         data = yf.download(symbol.upper(), interval=interval,
  61.                            start=first_date, end=last_date)
  62.        
  63.         #create df
  64.         data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
  65.  
  66.         #copy and structure
  67.         df = data.copy()
  68.         df['log'] = np.log(df['Close'])
  69.         df['log_return'] = df['log'].pct_change()
  70.         df['lnrange'] = np.log(df['High']/df['Low'])-1
  71.         df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
  72.                  'lnrange']]
  73.         df['lnvolume'] = np.log(df['Volume'])
  74.         df['log_return_volume'] = df['lnvolume'].pct_change()
  75.         df['range'] = (df["High"] / df["Low"]) - 1
  76.         df.dropna(inplace=True)
  77.        
  78.         #append symbol and interval columns handling defaults
  79.         if symbol == 'spy':
  80.             df.insert(0, 'symbol', 'SPY')
  81.         else:
  82.             df.insert(0, 'symbol', symbol.upper())
  83.        
  84.         if interval == '1d':
  85.             df.insert(1,'interval','1D')
  86.         else:
  87.             df.insert(1, 'interval', interval.upper())
  88.        
  89.         return df
  90.        
  91.     #first date only case
  92.     if first_date != 'max' and last_date == 'max':
  93.        
  94.         #import
  95.         from datetime import datetime, timedelta
  96.        
  97.         #handle when non-trading day
  98.         today = datetime.today()
  99.         offset = max(1,(today.weekday()+6) %7-3)
  100.         timed = timedelta(offset)
  101.         today_business = today - timed
  102.         end_date = today_business.strftime('%Y-%m-%d')
  103.        
  104.         #get data from yahoo
  105.         data = yf.download(symbol.upper(), interval=interval, start=first_date,
  106.                            end=end_date)
  107.        
  108.         #create df
  109.         data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
  110.  
  111.         #copy and structure
  112.         df = data.copy()
  113.         df['log'] = np.log(df['Close'])
  114.         df['log_return'] = df['log'].pct_change()
  115.         df['lnrange'] = np.log(df['High']/df['Low'])-1
  116.         df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
  117.                  'lnrange']]
  118.         df['lnvolume'] = np.log(df['Volume'])
  119.         df['log_return_volume'] = df['lnvolume'].pct_change()
  120.         df['range'] = (df["High"] / df["Low"]) - 1
  121.         df.dropna(inplace=True)
  122.        
  123.         #append symbol and interval columns handling defaults
  124.         if symbol == 'spy':
  125.             df.insert(0, 'symbol', 'SPY')
  126.         else:
  127.             df.insert(0, 'symbol', symbol.upper())
  128.            
  129.         if interval == '1d':
  130.             df.insert(1,'interval','1D')
  131.         else:
  132.             df.insert(1, 'interval', interval.upper())
  133.            
  134.         return df  
  135.    
  136.     #last date only case
  137.     if first_date == 'max' and last_date != 'max':
  138.        
  139.         #get data from yahoo
  140.         data = yf.download(symbol.upper(), interval=interval,
  141.                            start='1970-01-02', end=last_date)
  142.        
  143.         #create df
  144.         data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
  145.  
  146.         #copy and structure
  147.         df = data.copy()
  148.         df['log'] = np.log(df['Close'])
  149.         df['log_return'] = df['log'].pct_change()
  150.         df['lnrange'] = np.log(df['High']/df['Low'])-1
  151.         df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'log', 'log_return',
  152.                  'lnrange']]
  153.         df['lnvolume'] = np.log(df['Volume'])
  154.         df['log_return_volume'] = df['lnvolume'].pct_change()
  155.         df['range'] = (df["High"] / df["Low"]) - 1
  156.         df.dropna(inplace=True)
  157.        
  158.         #append symbol and interval columns handling defaults
  159.         if symbol == 'spy':
  160.             df.insert(0, 'symbol', 'SPY')
  161.         else:
  162.             df.insert(0, 'symbol', symbol.upper())
  163.            
  164.         if interval == '1d':
  165.             df.insert(1,'interval','1D')
  166.         else:
  167.             df.insert(1, 'interval', interval.upper())
  168.        
  169.         return df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement