Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from BeautifulSoup import BeautifulSoup
- from string import capwords, Template
- from urllib import urlencode
- from urllib2 import urlopen
- from sys import exit
- NAME_TYPES = ('A', '%') # active, all
- SEARCH_TYPES = ('BEGINS', 'CONTAINS', 'PARTIAL')
- BASE_URL = 'http://appext9.dos.state.ny.us/corp_public/'
- output_template = Template('''
- $CurrentEntityName
- Filing date: $InitialDOSFilingDate
- County: $County
- Jurisdiction: $Jurisdiction
- Entity Type: $EntityType
- Status: $CurrentEntityStatus
- Process Address:
- $DOSProcess
- Chairman's Address:
- $ChairmanorChiefExecutiveOfficer
- Executive's Addrress:
- $PrincipalExecutiveOffice
- Registered Agent:
- $RegisteredAgent
- ''')
- def corpsearch(name, name_type='A', search_type='CONTAINS'):
- if name_type not in NAME_TYPES or search_type not in SEARCH_TYPES:
- raise ValueError('Invalid parameters')
- data = urlencode({'p_entity_name': name, 'p_name_type': name_type, 'p_search_type': search_type})
- bs = BeautifulSoup(urlopen(BASE_URL + 'CORPSEARCH.SELECT_ENTITY', data))
- return [(a.text, BASE_URL + a['href']) for a in bs.findAll('a', href=lambda x: 'CORPSEARCH.ENTITY_INFORMATION' in x)]
- def corpinfo(url):
- bs = BeautifulSoup(urlopen(url))
- fields = {}
- for th in bs('th'):
- label = th.contents[0].strip().rstrip(':').replace(' ', '')
- value = ''.join('\n' if e.string is None else capwords(e.string.strip()) for e in th.findNext('td').contents)
- fields[label] = value
- return fields
- if __name__ == '__main__':
- srchname = raw_input('Enter search term: ')
- results = corpsearch(srchname)
- if len(results) == 0:
- print 'No results found.'
- exit(1)
- for i, res in enumerate(results, 1):
- print "%2d. %s" % (i, res[0])
- choice = int(raw_input('Enter a number to see its results: ')) - 1
- fields = corpinfo(results[choice][1])
- print output_template.substitute(**fields)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement