Untitled

from BeautifulSoup import BeautifulSoup
from string import capwords, Template
from urllib import urlencode
from urllib2 import urlopen
from sys import exit

NAME_TYPES = ('A', '%')   # active, all
SEARCH_TYPES = ('BEGINS', 'CONTAINS', 'PARTIAL')
BASE_URL = 'http://appext9.dos.state.ny.us/corp_public/'

output_template = Template('''
$CurrentEntityName

Filing date:    $InitialDOSFilingDate
County:         $County
Jurisdiction:   $Jurisdiction
Entity Type:    $EntityType
Status:         $CurrentEntityStatus

Process Address:
$DOSProcess

Chairman's Address:
$ChairmanorChiefExecutiveOfficer

Executive's Addrress:
$PrincipalExecutiveOffice

Registered Agent:
$RegisteredAgent
''')

def corpsearch(name, name_type='A', search_type='CONTAINS'):
    if name_type not in NAME_TYPES or search_type not in SEARCH_TYPES:
        raise ValueError('Invalid parameters')

    data = urlencode({'p_entity_name': name, 'p_name_type': name_type, 'p_search_type': search_type})
    bs = BeautifulSoup(urlopen(BASE_URL + 'CORPSEARCH.SELECT_ENTITY', data))

    return [(a.text, BASE_URL + a['href']) for a in bs.findAll('a', href=lambda x: 'CORPSEARCH.ENTITY_INFORMATION' in x)]

def corpinfo(url):
    bs = BeautifulSoup(urlopen(url))
    fields = {}
    for th in bs('th'):
        label = th.contents[0].strip().rstrip(':').replace(' ', '')
        value = ''.join('\n' if e.string is None else capwords(e.string.strip()) for e in th.findNext('td').contents)
        fields[label] = value
    return fields

if __name__ == '__main__':
    srchname = raw_input('Enter search term: ')
    results = corpsearch(srchname)
    if len(results) == 0:
        print 'No results found.'
        exit(1)
    for i, res in enumerate(results, 1):
        print "%2d. %s" % (i, res[0])
    choice = int(raw_input('Enter a number to see its results: ')) - 1
    fields = corpinfo(results[choice][1])
    print output_template.substitute(**fields)