Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- """ Your mission should you choose to accept it:... use urllib2() module to
- scrape daily summay page source of stock symbols. write() the source pages
- to local files until the list len() is reached. These files can later be
- read() into other regex() modules or scapy() for raw data extraction
- Future steps---->read() a symbol list from a file or user inputed list
- ---->regex() module to pluck out some image/graph data"""
- #version 3 updat added.
- import sys
- import os
- import re
- import urllib2
- from BeautifulSoup import BeautifulSoup
- opener= urllib2.build_opener()
- opener.addheaders = {('User_agent', 'Mozzilla/5.0')}
- def urldog():
- symbolslist = ["appl","goog","csco","intel","spy","fb"]
- i=0
- while i<len(symbolslist):
- urlScrape = "http://finance.yahoo.com/q?s=" + symbolslist[i]+"&ql=1"
- uf = urllib2.urlopen(urlScrape)
- x = uf.read()
- soup = BeautifulSoup(x)
- body = soup.body.text
- stuffgot = soup.link
- body = body.encode("ascii", "ignore")
- fqdat = open("goodiesdat"+str(i)+".txt","w")
- f = open("goodies"+str(i)+".txt","w")
- f.write(body)
- fqdat.write(str(stuffgot))
- f.close()
- uf.close()
- fqdat.close()
- i+=1
- print "The Url's of entered stock sympols have been scraped and saved to goodie.txt files after dumping the headder crap and snaging the first link"
- print "These files can be later read into a regex or custom module for data extraction. "
- # set up main
- def main():
- urldog()
- #call main
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment