Advertisement
eXFq7GJ1cC

Untitled

May 14th, 2012
315
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.94 KB | None | 0 0
  1. from BeautifulSoup import BeautifulSoup
  2. from string import capwords, Template
  3. from urllib import urlencode
  4. from urllib2 import urlopen
  5. from sys import exit
  6.  
  7. NAME_TYPES = ('A', '%')   # active, all
  8. SEARCH_TYPES = ('BEGINS', 'CONTAINS', 'PARTIAL')
  9. BASE_URL = 'http://appext9.dos.state.ny.us/corp_public/'
  10.  
  11. output_template = Template('''
  12. $CurrentEntityName
  13.  
  14. Filing date:    $InitialDOSFilingDate
  15. County:         $County
  16. Jurisdiction:   $Jurisdiction
  17. Entity Type:    $EntityType
  18. Status:         $CurrentEntityStatus
  19.  
  20. Process Address:
  21. $DOSProcess
  22.  
  23. Chairman's Address:
  24. $ChairmanorChiefExecutiveOfficer
  25.  
  26. Executive's Addrress:
  27. $PrincipalExecutiveOffice
  28.  
  29. Registered Agent:
  30. $RegisteredAgent
  31. ''')
  32.  
  33. def corpsearch(name, name_type='A', search_type='CONTAINS'):
  34.     if name_type not in NAME_TYPES or search_type not in SEARCH_TYPES:
  35.         raise ValueError('Invalid parameters')
  36.  
  37.     data = urlencode({'p_entity_name': name, 'p_name_type': name_type, 'p_search_type': search_type})
  38.     bs = BeautifulSoup(urlopen(BASE_URL + 'CORPSEARCH.SELECT_ENTITY', data))
  39.    
  40.     return [(a.text, BASE_URL + a['href']) for a in bs.findAll('a', href=lambda x: 'CORPSEARCH.ENTITY_INFORMATION' in x)]
  41.  
  42. def corpinfo(url):
  43.     bs = BeautifulSoup(urlopen(url))
  44.     fields = {}
  45.     for th in bs('th'):
  46.         label = th.contents[0].strip().rstrip(':').replace(' ', '')
  47.         value = ''.join('\n' if e.string is None else capwords(e.string.strip()) for e in th.findNext('td').contents)
  48.         fields[label] = value
  49.     return fields
  50.  
  51. if __name__ == '__main__':
  52.     srchname = raw_input('Enter search term: ')
  53.     results = corpsearch(srchname)
  54.     if len(results) == 0:
  55.         print 'No results found.'
  56.         exit(1)
  57.     for i, res in enumerate(results, 1):
  58.         print "%2d. %s" % (i, res[0])
  59.     choice = int(raw_input('Enter a number to see its results: ')) - 1
  60.     fields = corpinfo(results[choice][1])
  61.     print output_template.substitute(**fields)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement