Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- from selenium.common.exceptions import TimeoutException
- from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
- from selenium.webdriver.chrome.options import Options
- from bs4 import BeautifulSoup
- from datetime import datetime, timedelta
- import pandas as pd
- import sqlite3
- import yfinance as yf
- chrome_options = Options()
- chrome_options.add_argument("--headless")
- chrome_options.add_argument('log-level=3')
- driver = webdriver.Chrome(options=chrome_options)
- delay = 1
- conn = sqlite3.connect('earnings.db')
- cur = conn.cursor()
- class calendar():
- def __init__(self):
- # start reading the calendar at a date
- self.read_date = datetime.strptime('2018-02-22', '%Y-%m-%d')
- while self.read_date > datetime.strptime('2015-01-01', '%Y-%m-%d'):
- print(self.read_date)
- self.get_estimize_data()
- # iterate through the dates between 2016 and 2019
- self.read_date = self.read_date - timedelta(days=1)
- while self.read_date.weekday()>=5: # exclude weekends
- self.read_date = self.read_date - timedelta(days=1)
- def get_yahoo_historical(self, symbols):
- try:
- # check if we're already gotten the history previously
- # if we already have, just return
- for symbol in symbols:
- query = "select * from price_history where Symbol = '%s'" % symbol
- cur.execute(query)
- results = cur.fetchall()
- if results != []:
- symbols.remove(symbol)
- except:
- pass
- # query yahoo finance for the historical data
- print(' '.join(symbols))
- company = yf.Ticker(' '.join(symbols))
- hist = company.history(period='5y', auto_adjust=False)
- hist['Symbol'] = symbol
- # store in the database
- hist.to_sql('price_history', conn, if_exists='append', index=False)
- def get_estimize_data(self):
- # request the estimize website for data
- url = 'https://www.estimize.com/calendar?tab=equity&date=' + self.read_date.strftime('%Y-%m-%d')
- driver.get(url)
- # check if there are no companies reporting earnings
- myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.CLASS_NAME , 'dAViVi')))
- companies_reporting_div = driver.find_element_by_class_name('dAViVi')
- if '0 Events' in companies_reporting_div.text:
- print('No companies reporting')
- return
- # check if earnings already in database
- symbol_href = driver.find_element_by_class_name('lfkTWp')
- symbol = symbol_href.text
- report_date = driver.find_element_by_class_name('dybmdC')
- report_date = report_date.text.replace('\n', '')
- # Check if already exists
- query = 'select * from estimize_eps where "Date Reported" == "%s" and Symbol == "%s"' % (report_date, symbol)
- cur.execute(query)
- results = cur.fetchall()
- if results != []:
- print('Already found')
- return
- # method to extra the ticker symbols from the webpage
- tickers = self.get_tickers(driver)
- # method to get the historical data from yahoo
- #self.get_yahoo_historical(tickers)
- # read the table and make a dataframe out of it
- eps_df = pd.read_html(driver.page_source)[0]
- print(eps_df)
- print(len(eps_df), len(tickers))
- eps_df['Symbol'] = tickers
- # select only certain columns
- eps_df = eps_df.iloc[:, [2,3,5,6,7,8,9,10,12]]
- # rename columns
- eps_df.columns = ['Date Reported', 'Num of Estimates', 'Delta', 'Surprise', 'Historical Beat Rate', 'Wall St', 'Estimize', 'Actual', 'Symbol']
- # same as above, but for revenues table instead of EPS table
- url = 'https://www.estimize.com/calendar?tab=equity&metric=revenue&date=' + self.read_date.strftime('%Y-%m-%d')
- driver.get(url)
- myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.TAG_NAME , 'table')))
- revenue_df = pd.read_html(driver.page_source)[0]
- tickers = self.get_tickers(driver)
- revenue_df['Symbol'] = tickers
- revenue_df = revenue_df.iloc[:, [2,3,5,6,7,8,9,10,12]]
- revenue_df.columns = ['Date Reported', 'Num of Estimates', 'Delta', 'Surprise', 'Historical Beat Rate', 'Wall St', 'Estimize', 'Actual', 'Symbol']
- # store in the database
- print(eps_df)
- print(revenue_df)
- print()
- eps_df.to_sql('estimize_eps', conn, if_exists='append', index=False)
- revenue_df.to_sql('estimize_revenue', conn, if_exists='append', index=False)
- def get_tickers(self,driver):
- # extract ticker symbopls from the html source
- soup = BeautifulSoup(driver.page_source)
- ticker_links = soup.findAll('a', attrs={'class': 'lfkTWp'})
- # create list of symbols that were extracted
- tickers = []
- for ticker in ticker_links:
- tickers.append(ticker.contents[0])
- return tickers
- # create list of dates
- # start the program
- calendar()
Advertisement
Add Comment
Please, Sign In to add comment