Guest User

Untitled

a guest
Jan 12th, 2020
370
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.36 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.support.ui import WebDriverWait
  3. from selenium.webdriver.support import expected_conditions as EC
  4. from selenium.webdriver.common.by import By
  5. from selenium.common.exceptions import TimeoutException
  6. from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
  7. from selenium.webdriver.chrome.options import Options
  8.  
  9.  
  10. from bs4 import BeautifulSoup
  11. from datetime import datetime, timedelta
  12. import pandas as pd
  13. import sqlite3
  14. import yfinance as yf
  15.  
  16. chrome_options = Options()
  17. chrome_options.add_argument("--headless")
  18. chrome_options.add_argument('log-level=3')
  19. driver = webdriver.Chrome(options=chrome_options)
  20. delay = 1
  21.  
  22. conn = sqlite3.connect('earnings.db')
  23. cur = conn.cursor()
  24.  
  25. class calendar():
  26. def __init__(self):
  27. # start reading the calendar at a date
  28. self.read_date = datetime.strptime('2018-02-22', '%Y-%m-%d')
  29. while self.read_date > datetime.strptime('2015-01-01', '%Y-%m-%d'):
  30. print(self.read_date)
  31. self.get_estimize_data()
  32.  
  33. # iterate through the dates between 2016 and 2019
  34. self.read_date = self.read_date - timedelta(days=1)
  35. while self.read_date.weekday()>=5: # exclude weekends
  36. self.read_date = self.read_date - timedelta(days=1)
  37.  
  38.  
  39. def get_yahoo_historical(self, symbols):
  40. try:
  41. # check if we're already gotten the history previously
  42. # if we already have, just return
  43. for symbol in symbols:
  44. query = "select * from price_history where Symbol = '%s'" % symbol
  45. cur.execute(query)
  46. results = cur.fetchall()
  47. if results != []:
  48. symbols.remove(symbol)
  49. except:
  50. pass
  51.  
  52. # query yahoo finance for the historical data
  53. print(' '.join(symbols))
  54. company = yf.Ticker(' '.join(symbols))
  55. hist = company.history(period='5y', auto_adjust=False)
  56. hist['Symbol'] = symbol
  57.  
  58. # store in the database
  59. hist.to_sql('price_history', conn, if_exists='append', index=False)
  60.  
  61.  
  62. def get_estimize_data(self):
  63. # request the estimize website for data
  64. url = 'https://www.estimize.com/calendar?tab=equity&date=' + self.read_date.strftime('%Y-%m-%d')
  65. driver.get(url)
  66.  
  67. # check if there are no companies reporting earnings
  68. myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.CLASS_NAME , 'dAViVi')))
  69. companies_reporting_div = driver.find_element_by_class_name('dAViVi')
  70. if '0 Events' in companies_reporting_div.text:
  71. print('No companies reporting')
  72. return
  73.  
  74. # check if earnings already in database
  75. symbol_href = driver.find_element_by_class_name('lfkTWp')
  76. symbol = symbol_href.text
  77.  
  78. report_date = driver.find_element_by_class_name('dybmdC')
  79. report_date = report_date.text.replace('\n', '')
  80.  
  81. # Check if already exists
  82. query = 'select * from estimize_eps where "Date Reported" == "%s" and Symbol == "%s"' % (report_date, symbol)
  83. cur.execute(query)
  84. results = cur.fetchall()
  85. if results != []:
  86. print('Already found')
  87. return
  88.  
  89. # method to extra the ticker symbols from the webpage
  90. tickers = self.get_tickers(driver)
  91.  
  92. # method to get the historical data from yahoo
  93. #self.get_yahoo_historical(tickers)
  94.  
  95. # read the table and make a dataframe out of it
  96. eps_df = pd.read_html(driver.page_source)[0]
  97. print(eps_df)
  98. print(len(eps_df), len(tickers))
  99. eps_df['Symbol'] = tickers
  100.  
  101. # select only certain columns
  102. eps_df = eps_df.iloc[:, [2,3,5,6,7,8,9,10,12]]
  103.  
  104. # rename columns
  105. eps_df.columns = ['Date Reported', 'Num of Estimates', 'Delta', 'Surprise', 'Historical Beat Rate', 'Wall St', 'Estimize', 'Actual', 'Symbol']
  106.  
  107. # same as above, but for revenues table instead of EPS table
  108. url = 'https://www.estimize.com/calendar?tab=equity&metric=revenue&date=' + self.read_date.strftime('%Y-%m-%d')
  109. driver.get(url)
  110. myElem = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.TAG_NAME , 'table')))
  111.  
  112. revenue_df = pd.read_html(driver.page_source)[0]
  113. tickers = self.get_tickers(driver)
  114. revenue_df['Symbol'] = tickers
  115. revenue_df = revenue_df.iloc[:, [2,3,5,6,7,8,9,10,12]]
  116. revenue_df.columns = ['Date Reported', 'Num of Estimates', 'Delta', 'Surprise', 'Historical Beat Rate', 'Wall St', 'Estimize', 'Actual', 'Symbol']
  117.  
  118. # store in the database
  119. print(eps_df)
  120. print(revenue_df)
  121. print()
  122. eps_df.to_sql('estimize_eps', conn, if_exists='append', index=False)
  123. revenue_df.to_sql('estimize_revenue', conn, if_exists='append', index=False)
  124.  
  125.  
  126. def get_tickers(self,driver):
  127. # extract ticker symbopls from the html source
  128. soup = BeautifulSoup(driver.page_source)
  129. ticker_links = soup.findAll('a', attrs={'class': 'lfkTWp'})
  130.  
  131. # create list of symbols that were extracted
  132. tickers = []
  133. for ticker in ticker_links:
  134. tickers.append(ticker.contents[0])
  135.  
  136. return tickers
  137.  
  138. # create list of dates
  139.  
  140. # start the program
  141. calendar()
Advertisement
Add Comment
Please, Sign In to add comment