elkclone

stockscraper3.py

Feb 27th, 2014
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.65 KB | None | 0 0
  1. #!/usr/bin/python
  2. """ Your mission should you choose to accept it:... use urllib2() module to
  3. scrape daily summay page source of stock symbols. write() the source pages
  4. to local files until the list len() is reached. These files can later be
  5. read() into other regex() modules or scapy() for raw data extraction
  6. Future steps---->read() a symbol list from a file or user inputed list
  7.            ---->regex() module to pluck out some image/graph data"""
  8. #version 3 updat added.
  9.  
  10. import sys
  11. import os
  12. import re
  13. import urllib2
  14. from BeautifulSoup import BeautifulSoup
  15.  
  16. opener= urllib2.build_opener()
  17. opener.addheaders = {('User_agent', 'Mozzilla/5.0')}
  18.  
  19.  
  20. def urldog():
  21.     symbolslist = ["appl","goog","csco","intel","spy","fb"]
  22.     i=0
  23.     while i<len(symbolslist):
  24.         urlScrape = "http://finance.yahoo.com/q?s=" + symbolslist[i]+"&ql=1"
  25.         uf = urllib2.urlopen(urlScrape)
  26.         x = uf.read()
  27.         soup = BeautifulSoup(x)
  28.         body = soup.body.text
  29.         stuffgot = soup.link
  30.         body = body.encode("ascii", "ignore")  
  31.         fqdat = open("goodiesdat"+str(i)+".txt","w")
  32.         f = open("goodies"+str(i)+".txt","w")
  33.         f.write(body)
  34.         fqdat.write(str(stuffgot))
  35.         f.close()
  36.         uf.close()
  37.         fqdat.close()
  38.         i+=1
  39.     print "The Url's of entered stock sympols have been scraped and saved to goodie.txt files after dumping the headder crap and snaging the first link"
  40.     print "These files can be later read into a regex or custom  module for data extraction. "
  41. # set up main
  42. def main():
  43.     urldog()
  44.    
  45. #call main
  46. if __name__ == '__main__':
  47.     main()
Advertisement
Add Comment
Please, Sign In to add comment