stockscraper3.py

#!/usr/bin/python
#[email protected]
""" Your mission should you choose to accept it:... use urllib2() module to
scrape daily summay page source of stock symbols. write() the source pages
 to local files until the list len() is reached. These files can later be
 read() into other regex() modules or scapy() for raw data extraction
Future steps---->read() a symbol list from a file or user inputed list
            ---->regex() module to pluck out some image/graph data"""
#version 3 updat added.

import sys
import os
import re
import urllib2
from BeautifulSoup import BeautifulSoup

opener= urllib2.build_opener()
opener.addheaders = {('User_agent', 'Mozzilla/5.0')}


def urldog():
    symbolslist = ["appl","goog","csco","intel","spy","fb"]
    i=0
    while i<len(symbolslist):
        urlScrape = "http://finance.yahoo.com/q?s=" + symbolslist[i]+"&ql=1"
        uf = urllib2.urlopen(urlScrape)
        x = uf.read()
        soup = BeautifulSoup(x)
        body = soup.body.text
        stuffgot = soup.link
        body = body.encode("ascii", "ignore")
        fqdat = open("goodiesdat"+str(i)+".txt","w")
        f = open("goodies"+str(i)+".txt","w")
        f.write(body)
        fqdat.write(str(stuffgot))
        f.close()
        uf.close()
        fqdat.close()
        i+=1
    print "The Url's of entered stock sympols have been scraped and saved to goodie.txt files after dumping the headder crap and snaging the first link"
    print "These files can be later read into a regex or custom  module for data extraction. "
# set up main
def main():
    urldog()

#call main
if __name__ == '__main__':
    main()