Advertisement
Guest User

EdgarScrape

a guest
Mar 18th, 2016
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.07 KB | None | 0 0
  1. #Import necessary modules
  2. import smtplib
  3. import datetime
  4. from twilio.rest import TwilioRestClient
  5. import feedparser
  6. import openpyxl
  7. import xml.etree.ElementTree as ET
  8. import requests
  9. import bs4
  10. import time
  11.  
  12. #Initialize lists that we will record our data in
  13. CompanyNameList = []
  14. TickerList = []
  15. ProcessedList = []
  16. ReportingOwnerRelationshipList = []
  17. TransactionSharesList = []
  18. PricePerShareList = []
  19. TotalValueList = []
  20. transactionCodeList = []
  21. DorIList = []
  22. portfolio = []
  23. bought_price = []
  24. stocks_sent = []
  25. checked = []
  26.  
  27.  
  28. #----------------------------------------------------------------------------------#
  29. #PULL FROM STOCK SCREEN EXCEL AND CURRENT PORTFOLIO
  30.  
  31. wb = openpyxl.load_workbook(filename = 'stock_screenv2.xlsx')
  32. sheet = wb.active
  33.  
  34. print('Getting info from cells...')
  35. for row in range(2, sheet.max_row + 1):
  36.     company_name      = sheet['A' + str(row)].value
  37.     ticker            = sheet['B' + str(row)].value
  38.     CompanyNameList.append(company_name)
  39.     TickerList.append(ticker)
  40.  
  41. wb.save('stock_screenv2.xlsx')
  42.  
  43. with open('portfolio.txt', 'r') as f:
  44.     stocks = f.readlines()
  45.     for item in stocks:
  46.         item = item.strip()
  47.         portfolio.append(item)
  48.  
  49. with open('bought_price.txt', 'r') as f:
  50.     price = f.readlines()
  51.     for item in price:
  52.         item = item.strip()
  53.         bought_price.append(item)
  54.  
  55. #----------------------------------------------------------------------------------#
  56. #COMMUNICATION FUNCTIONS
  57.  
  58. def email(tradingSymbol, link):
  59.     today = datetime.datetime.today()
  60.     today = today.strftime('%m/%d/%Y %I:%M %p')
  61.     smtpObj = smtplib.SMTP('smtp.gmail.com', 587)
  62.     smtpObj.ehlo()
  63.     smtpObj.starttls()
  64.     smtpObj.login('***********************@gmail.com', '**********')
  65.     print(smtpObj.sendmail('***************@gmail.com',\
  66.                      '***************@gmail.com',\
  67.                      'Subject: ' + str(today) + ' | Stock order: ' + str(tradingSymbol) + '.\nBuy this stock and heres the address + ' + str(link) + '\n'))
  68.     smtpObj.quit()
  69.  
  70. def text_phone(tradingSymbol):
  71.     accountSID = '***********************************'
  72.     authToken = '***********************************'
  73.     twilioCli = TwilioRestClient(accountSID, authToken)
  74.     myTwilioNumber = '**************'
  75.     myCellPhone = '*****************'
  76.     message = twilioCli.messages.create(body='Yo, buy this stock: ' + str(tradingSymbol), from_=myTwilioNumber, to=myCellPhone)
  77.  
  78.  
  79. #----------------------------------------------------------------------------------#
  80. #SCAN EDGAR, SCRAPE XML, AND CHECK PRICES
  81.  
  82. #Gets the link to the XML of the relevant insider buy and looks for pre-defined characteristics on the Form 4
  83. def scrape_xml(link):
  84.     TotalValue = 0
  85.     transactionCodeList = []
  86.     DorIList = []
  87.     today = datetime.datetime.today()
  88.     today = today.strftime('%m/%d/%Y %I:%M %p')
  89.  
  90.     headers = {
  91.     "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36",
  92.     "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
  93.     "accept-charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
  94.     "accept-encoding": "gzip, deflate, sdch",
  95.     "accept-language": "en-US,en;q=0.8",
  96.     }
  97.     res = requests.get(link, headers=headers)
  98.     soup = bs4.BeautifulSoup(res.text, 'html.parser')
  99.     try:
  100.         for a in soup.find_all('a'):
  101.             if 'Archives' in a['href'] and 'xml' in a.getText():
  102.                 address = 'http://www.sec.gov' + a['href']
  103.                 print ('Scraping XML on ' + str(today) + ' at ' + str(link))
  104.                 res = requests.get(address, headers=headers)
  105.                 tree = ET.fromstring(res.text)
  106.                 isOfficer = tree.find('reportingOwner/reportingOwnerRelationship/isOfficer')
  107.                 if isOfficer == None:
  108.                     isOfficer = ''
  109.                 transactionCode = tree.findall('nonDerivativeTable/nonDerivativeTransaction/transactionCoding/transactionCode')
  110.                 if transactionCode == None:
  111.                     transactionCode = []
  112.                 tradingSymbol = tree.find('issuer/issuerTradingSymbol')
  113.                 transactionShares = tree.findall('nonDerivativeTable/nonDerivativeTransaction/transactionAmounts/transactionShares/value')
  114.                 if transactionShares == None:
  115.                     transactionShares = []
  116.                 transactionPricePerShare = tree.findall('nonDerivativeTable/nonDerivativeTransaction/transactionAmounts/transactionPricePerShare/value')
  117.                 if transactionShares == None:
  118.                     transactionShares = []
  119.                 DorI = tree.findall('nonDerivativeTable/nonDerivativeTransaction/ownershipNature/directOrIndirectOwnership/value')
  120.                 if DorI == None:
  121.                     DorI = []
  122.                 for price, shares, direct, code in zip(transactionPricePerShare, transactionShares, DorI, transactionCode):
  123.                     if direct.text == 'D' and code.text == 'P':
  124.                         TotalValue = TotalValue + float(shares.text)*float(price.text)
  125.                 for code in transactionCode:
  126.                     transactionCodeList.append(code.text)
  127.                 for item in DorI:
  128.                     DorIList.append(item.text)
  129.                 print (isOfficer.text)
  130.                 print(transactionCodeList)
  131.                 print (TotalValue)
  132.                 print (DorIList)
  133.                 print (tradingSymbol.text)  
  134.                 if isOfficer != None:
  135.                     if isOfficer.text == str(1) and 'P' in transactionCodeList and TotalValue > 10000 and 'D' in DorIList and tradingSymbol.text not in portfolio:
  136.                         print ('Stock found.')
  137.                         print (today)
  138.                         print (tradingSymbol.text)
  139.                         with open('portfolio.txt', 'a') as f:
  140.                             f.write(tradingSymbol.text + '\n')
  141.                         ticker = tradingSymbol.text.lower()
  142.                         res = requests.get('http://finance.yahoo.com/q?s=' + ticker)
  143.                         soup = bs4.BeautifulSoup(res.text, 'html.parser')
  144.                         elems = soup.select('#yfs_l84_'+str(ticker))
  145.                         current_price = elems[0].getText()
  146.                         with open('bought_price.txt', 'a') as f:
  147.                             f.write(current_price + '\n')
  148.                         email (tradingSymbol.text, link)
  149.                         text_phone (tradingSymbol.text)
  150.                         text_scott (tradingSymbol.text)
  151.                         print ('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
  152.     except Exception as e:
  153.         print (address)
  154.         print (e)
  155.         print (datetime.datetime.today())
  156.         pass
  157.  
  158. #Scrapes every entry on the RSS feed of the SEC's "Lastest Filings"
  159. def edgar_feed(url):
  160.     try:
  161.         d = feedparser.parse(url)
  162.         lower = [x.lower() for x in CompanyNameList]
  163.         for entry in range(0,100):
  164.             company_name = d.entries[entry].title.lower()
  165.             company_name = company_name.split('- ')
  166.             company_name = company_name[1].split(' (')
  167.             company_name = company_name[0]
  168.             if company_name in lower and d.entries[entry].title[0:1:] == '4':
  169.                 link = d.entries[entry].link
  170.                 stocks_sent.append(link)
  171.                 if link not in stocks_sent:
  172.                     scrape_xml(link)
  173.             else:
  174.                 pass
  175.     except Exception as e:
  176.         print (e)
  177.         print (datetime.datetime.today())
  178.         pass
  179.  
  180. #Checks prices in current portfolio and notifies to sell if +2% or -5%
  181. def check_price():
  182.     for stock, price in zip(portfolio, bought_price):
  183.         ticker = stock.lower()
  184.         res = requests.get('http://finance.yahoo.com/q?s=' + ticker)
  185.         soup = bs4.BeautifulSoup(res.text, 'html.parser')
  186.         elems = soup.select('#yfs_l84_'+str(ticker))
  187.         current_price = elems[0].getText()
  188.         ticker = stock.upper()
  189.         if float(current_price) > 1.02*float(price) and stock not in checked:
  190.         #Email
  191.             today = datetime.datetime.today()
  192.             today = today.strftime('%m/%d/%Y %I:%M %p')
  193.             smtpObj = smtplib.SMTP('smtp.gmail.com', 587)
  194.             smtpObj.ehlo()
  195.             smtpObj.starttls()
  196.             smtpObj.login('**************@gmail.com', 'password')
  197.             print(smtpObj.sendmail('*****************@gmail.com',\
  198.                              '*********************@gmail.com',\
  199.                              'Subject: ' + str(today) + ' | Stock to sell after 2% gains: ' + str(ticker) + '.\nSell this stock' + '\n'))
  200.             smtpObj.quit()
  201.             #Text me
  202.             accountSID = '*******************************'
  203.             authToken = '*******************************'
  204.             twilioCli = TwilioRestClient(accountSID, authToken)
  205.             myTwilioNumber = '***************'
  206.             myCellPhone = '**************'
  207.             message = twilioCli.messages.create(body='Yo, sell this stock (2% gain): ' + str(ticker), from_=myTwilioNumber, to=myCellPhone)
  208.             portfolio.remove(stock)
  209.             bought_price.remove(price)
  210.             checked.append(stock)
  211.             f = open("portfolio.txt","r+")
  212.             d = f.readlines()
  213.             f.seek(0)
  214.             for i in d:
  215.                 if i != str(stock + '\n'):
  216.                     f.write(i)
  217.             f.truncate()
  218.             f.close()
  219.             f = open("bought_price.txt","r+")
  220.             d = f.readlines()
  221.             f.seek(0)
  222.             for i in d:
  223.                 if i != str(price + '\n'):
  224.                     f.write(i)
  225.             f.truncate()
  226.             f.close()
  227.             checked.append(stock)
  228.         elif float(current_price) < .95*float(price) and stock not in checked:
  229.         #Email
  230.             today = datetime.datetime.today()
  231.             today = today.strftime('%m/%d/%Y %I:%M %p')
  232.             smtpObj = smtplib.SMTP('smtp.gmail.com', 587)
  233.             smtpObj.ehlo()
  234.             smtpObj.starttls()
  235.             smtpObj.login('*****************@gmail.com', '*****************')
  236.             print(smtpObj.sendmail('***********@gmail.com',\
  237.                              '**************@gmail.com',\
  238.                              'Subject: ' + str(today) + ' | Stock to sell after 5% losses: ' + str(ticker) + '.\nSell this stock' + '\n'))
  239.             smtpObj.quit()
  240.         #Text me
  241.             accountSID = '**********************'
  242.             authToken = '**********************'
  243.             twilioCli = TwilioRestClient(accountSID, authToken)
  244.             myTwilioNumber = '+13607270127'
  245.             myCellPhone = '+13605626329'
  246.             message = twilioCli.messages.create(body='Yo, sell this stock (5% losses): ' + str(ticker), from_=myTwilioNumber, to=myCellPhone)
  247.         #Remove from portfolio
  248.             portfolio.remove(stock)
  249.             bought_price.remove(price)
  250.             stock = stock.upper()
  251.             f = open("portfolio.txt","r+")
  252.             d = f.readlines()
  253.             f.seek(0)
  254.             for i in d:
  255.                 if i != str(stock + '\n'):
  256.                     f.write(i)
  257.             f.truncate()
  258.             f.close()
  259.             f = open("bought_price.txt","r+")
  260.             d = f.readlines()
  261.             f.seek(0)
  262.             for i in d:
  263.                 if i != str(price + '\n'):
  264.                     f.write(i)
  265.             f.truncate()
  266.             f.close()
  267.             checked.append(stock)
  268.         else:
  269.             pass
  270.  
  271. #-------------------------------------------------------------------------------------#
  272. #SCRIPT BODY
  273.  
  274. #Has a "while True" to make sure both functions (edgar_feed and check_price) run constantly    
  275. url = 'http://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type=&company=&dateb=&owner=only&start=0&count=100&output=atom'
  276. print ('monitoring feed...')
  277. run_counter = 0
  278. def job():
  279.     global run_counter
  280.     time.sleep(10)
  281.     run_counter += 1
  282.     if run_counter % 100 == 0:
  283.         print ('Completed ' + str(run_counter) + ' passes.')
  284.     edgar_feed(url)
  285.     check_price()
  286.  
  287. while True:
  288.     job()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement