Advertisement
wtgeographer

Untitled

Jul 9th, 2018
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.70 KB | None | 0 0
  1.  
  2. # coding: utf-8
  3.  
  4. # In[1]:
  5.  
  6.  
  7. import pandas as pd
  8. import json
  9. import requests
  10. import re
  11.  
  12.  
  13. # In[2]:
  14.  
  15.  
  16. #https://gis.mcassessor.maricopa.gov/arcgis/rest/services/Parcels/MapServer/0/query?where=PUC+%3D+%270261%27&text=*&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=100&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson
  17. #https://gis.mcassessor.maricopa.gov/arcgis/rest/services/MaricopaDynamicQueryService/MapServer/3/query?where=PUC%3D%270261%27+AND+PHYSICAL_CITY+%3D%27CHANDLER%27&text=*&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson
  18. df = pd.read_csv('hoas/common_parcels_dissolved.csv')
  19.  
  20.  
  21. # In[3]:
  22.  
  23.  
  24. stopwords = ['ASSOC', 'HOMEOWNER\'S', 'INC', 'IN', 'ASSN', 'HOA', 'ASSO', 'NO', 'ASS', "H", "O", "A", "THE"]
  25. hoaList = []
  26. resultList = []
  27.  
  28. jurisdiction_code = 'us_az'
  29. key = 'dKIhgoGMyg2obcnxZdlg'
  30. api_key = ''
  31.  
  32.  
  33. if key:
  34.     api_key = '&api_token=' + key + '&per_page=100'
  35.  
  36. results =[]
  37.    
  38. def get_companies(companies):
  39.        
  40.     count = 0
  41.     company_id = None
  42.     for item in companies:
  43.         company = item['company']
  44.         #grab the first company id
  45.         company_id = company['company_number']
  46.         print company_id
  47.         count += 1
  48.  
  49.     if company_id is not None and count == 1:
  50.         results.append(company_id)
  51.     else:        
  52.         pass
  53.          
  54.     #print results
  55.  
  56.    
  57. def hasNumbers(inputString):
  58.     #return
  59.     return any(char.isdigit() for char in inputString)
  60.  
  61.  
  62. for index, row in df.iloc[499:].iterrows():
  63.     hoaList.append({'name':row['OWNER'], 'zipcode':row['ZIP_CODE']})
  64.  
  65. chkList = []
  66. queryCount = 0
  67. for elem in hoaList:
  68.     queryCount += 1
  69.     querywords = elem['name'].split()
  70.     resultwords  = [word for word in querywords if word not in stopwords]
  71.     result = ' '.join(resultwords)
  72.     #print result
  73.     if hasNumbers(result) == True:
  74.         try:
  75.             parentsub = re.split(r'(^[^\d]+)', result)[1]
  76.             wildcard = '*' + parentsub.rstrip() + '*'
  77.         except:
  78.             print ""
  79.             print '!!! ' + result + ' !!!'
  80.             print ""
  81.    
  82.     else:
  83.        
  84.         parentsub = result
  85.         wildcard = '*' + parentsub.rstrip() + '*'
  86.  
  87.     if wildcard not in chkList:
  88.         chkList.append(wildcard)
  89.         print str(queryCount) + ': ' +parentsub
  90.         PARAMS = {'q': wildcard, 'jurisdiction_code': 'us_az', 'api_token': key}
  91.         URL = "https://api.opencorporates.com/v0.4/companies/search?"
  92.         r = requests.get(url=URL, params=PARAMS)
  93.         print r.url
  94.         data = r.json()
  95.         #status = r.status_code
  96.         #print data
  97.         get_companies(data['results']['companies'])
  98.  
  99.     else:
  100.         pass
  101.  
  102. print ""
  103. print "Total matched: " + str(len(results))
  104. df['company_id'] = pd.Series(results)
  105. df.to_csv('hoas_out_2.csv')
  106.  
  107.     #print r.url
  108. #     data = r.json()
  109. # json.dump(data, companiesfile)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement