Untitled

import urllib;
import bs4;


secFtpBase = 'ftp://ftp.sec.gov/edgar/';
secUserName = 'anonymous';
secPassWord = 'tmo8145@gmail.com';


"""
Downloads the sec filing via ftp and returns a uncleaned html document object model (dom) as a str

Cik is used to map a company to a unique id
Accession Number is a number unique to the filing document

"""
def downloadSecDocument(cik, accessionNum):
	documentFtpLink = secFtpBase + 'data/' + cik + '/' + accessionNum + '/' + accessionNum[0:11] + '-' + accessionNum[11:12] + '-' + accessionNum[12:];
	fileStr = requests.get(documentFtpLink, auth=(secUserName, secPassWord));


"""
Returns the basic file data located at the top of the filing before the html document starts

Returned data includes: accession number, acceptance datetime, form type, report period, company name, cik
"""
def getBasicFileData(fileStr):
	fileLines = fileStr.iter_lines():
	acceptaceDateTimeStr = [21:];
	accessionNumber = [19:];
	submissionType = [28:];
	companyName =
	cik =
	reportPeriod = fileLine[];
	fileDate = fileLine[];


"""
Main function for parsing a 10k/Q

fileStr is an uncleaned html/css str containing

"""

def parsefile(fileStr):
	fileDom = BeautifulSoup(fileStr);


"""
Extracts tables from a beautiful soup obj and returns it in a dictionary form

"""
def parseTables(fileDom):
	var tableObjs =fileDom.find_all('table');

	for tableObj in tableObjs:
		parseTable(tableObj);

"""
Given a table in html format, creates a list of lists that represents the table

"""
def parseTable(tableObj)
	var tableRows = tableObj.find_all('tr');

	#Need to check if row contains headers with <th> tags or data with <td tags>
	#Row might contain headers and data b/c of row headers
	for tableRow in tableRows:
		rowReaders = tableRow.find_all('th');
		rowData = tableRow.find_all('td');