SH1NU11b1

followtheMoney.py

Jul 20th, 2017
224
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.22 KB | None | 0 0
  1. import requests
  2. import json
  3. import argparse
  4. import csv
  5. import urllib
  6. import os
  7.  
  8. ap = argparse.ArgumentParser()
  9. ap.add_argument("-c","--company",    required=True,help="Pass in the exact corporate name to search for.")
  10. ap.add_argument("-j","--jurisdiction", required=True,help="Pass in a two character jurisdiction code.")
  11. ap.add_argument("-t","--timeline",     required=True,help="Timeline file, will create if doesn't exist or add to existing spreadsheet.")
  12.  
  13.  
  14. args = vars(ap.parse_args())
  15.  
  16. company       = args['company']
  17. jurisdiction  = args['jurisdiction']
  18. timeline_file = args['timeline']
  19.  
  20. #
  21. # Do an initial corporate search
  22. #
  23. def corporation_search(corporation,jurisdiction):
  24.    
  25.     url  = "https://api.opencorporates.com/v0.4/companies/search?q=%s" % urllib.quote(corporation)
  26.     url += "&jurisdiction_code=%s" % jurisdiction
  27.            
  28.     response = requests.get(url)
  29.    
  30.     if response.status_code == 200:
  31.        
  32.         result = json.loads(response.content)
  33.        
  34.         for company in result['results']['companies']:
  35.            
  36.             if corporation.lower() == company['company']['name'].lower():
  37.                
  38.                 return company['company']['company_number'], company['company']['name']
  39.                
  40.    
  41.     return None,None  
  42.  
  43. #
  44. # Retrieve corporate filings
  45. #
  46. def corporate_filings(corporation_id,jurisdiction):
  47.    
  48.     api_url = "https://api.opencorporates.com/v0.4/companies/%s/%s/filings" % (jurisdiction,corporation_id)
  49.    
  50.     response = requests.get(api_url)
  51.    
  52.     filings = []
  53.    
  54.     if response.status_code == 200:
  55.        
  56.         result = json.loads(response.content)
  57.  
  58.         filings.extend(result['results']['filings'])
  59.  
  60.         # iterate over all filings
  61.         total_pages = result['results']['total_pages']
  62.         count       = 2
  63.        
  64.         while count <= total_pages:
  65.            
  66.             url = api_url + "?page=%d" % count
  67.            
  68.             response = requests.get(url)
  69.            
  70.             if response.status_code == 200:
  71.                
  72.                 result = json.loads(response.content)
  73.                
  74.                 filings.extend(result['results']['filings'])
  75.                
  76.             count += 1
  77.            
  78.         print "[*] Retrieved %d filing records." % len(filings)
  79.        
  80.         return filings
  81.  
  82.     return None
  83.  
  84. #
  85. # Build a timeline using the TimelineJS template
  86. #
  87. def build_timeline(filings,corporate_name):
  88.    
  89.     fields = [
  90.         'Year', 'Month', 'Day', 'Time', 'End Year',
  91.         'End Month', 'End Day', 'End Time', 'Display Date',
  92.         'Headline', 'Text', 'Media', 'Media Credit', 'Media Caption',
  93.         'Media Thumbnail', 'Type', 'Group', 'Background']
  94.    
  95.     if not os.path.exists(timeline_file):
  96.         write_header = True
  97.     else:
  98.         write_header = False
  99.    
  100.     with open(timeline_file,"ab") as output:
  101.        
  102.         writer = csv.DictWriter(output,fieldnames=fields)
  103.        
  104.         if write_header is True:
  105.             writer.writeheader()
  106.        
  107.        
  108.         # add each corporate filing to the spreadsheet
  109.         for filing in filings:
  110.            
  111.             year,month,day = filing['filing']['date'].split("-")
  112.            
  113.             record = {}
  114.             record['Year']  = year
  115.             record['Month'] = month
  116.             record['Day']   = day
  117.            
  118.             record['Display Date'] = filing['filing']['date']
  119.             record['Headline']     = "%s - %s" % (corporate_name,filing['filing']['title'])
  120.             record['Text']         = "View record <a href='%s'>here</a>" % filing['filing']['opencorporates_url']
  121.             record['Group']        = corporate_name
  122.            
  123.             writer.writerow(record)
  124.            
  125.        
  126.         print "[*] Added records to spreadsheet: %s" % timeline_file
  127.  
  128.  
  129.         return
  130.    
  131. # find the company
  132. corporate_id,corporate_name = corporation_search(company,jurisdiction)
  133.  
  134. if corporate_id is not None:
  135.  
  136.     # extract all filings
  137.     filings = corporate_filings(corporate_id,jurisdiction)
  138.  
  139.     # build a timeline
  140.     build_timeline(filings,corporate_name)
  141. else:
  142.     print "[!!!] Failed to retrieve corporation. Please check the name."
Add Comment
Please, Sign In to add comment