Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf-8
- # In[1]:
- import pandas as pd
- import json
- import requests
- import re
- # In[2]:
- #https://gis.mcassessor.maricopa.gov/arcgis/rest/services/Parcels/MapServer/0/query?where=PUC+%3D+%270261%27&text=*&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=100&queryByDistance=&returnExtentsOnly=false&datumTransformation=¶meterValues=&rangeValues=&f=pjson
- #https://gis.mcassessor.maricopa.gov/arcgis/rest/services/MaricopaDynamicQueryService/MapServer/3/query?where=PUC%3D%270261%27+AND+PHYSICAL_CITY+%3D%27CHANDLER%27&text=*&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentsOnly=false&datumTransformation=¶meterValues=&rangeValues=&f=pjson
- df = pd.read_csv('hoas/common_parcels_dissolved.csv')
- # In[3]:
- stopwords = ['ASSOC', 'HOMEOWNER\'S', 'INC', 'IN', 'ASSN', 'HOA', 'ASSO', 'NO', 'ASS', "H", "O", "A", "THE"]
- hoaList = []
- resultList = []
- jurisdiction_code = 'us_az'
- key = 'dKIhgoGMyg2obcnxZdlg'
- api_key = ''
- if key:
- api_key = '&api_token=' + key + '&per_page=100'
- results =[]
- def get_companies(companies):
- count = 0
- company_id = None
- for item in companies:
- company = item['company']
- #grab the first company id
- company_id = company['company_number']
- print company_id
- count += 1
- if company_id is not None and count == 1:
- results.append(company_id)
- else:
- pass
- #print results
- def hasNumbers(inputString):
- #return
- return any(char.isdigit() for char in inputString)
- for index, row in df.iloc[499:].iterrows():
- hoaList.append({'name':row['OWNER'], 'zipcode':row['ZIP_CODE']})
- chkList = []
- queryCount = 0
- for elem in hoaList:
- queryCount += 1
- querywords = elem['name'].split()
- resultwords = [word for word in querywords if word not in stopwords]
- result = ' '.join(resultwords)
- #print result
- if hasNumbers(result) == True:
- try:
- parentsub = re.split(r'(^[^\d]+)', result)[1]
- wildcard = '*' + parentsub.rstrip() + '*'
- except:
- print ""
- print '!!! ' + result + ' !!!'
- print ""
- else:
- parentsub = result
- wildcard = '*' + parentsub.rstrip() + '*'
- if wildcard not in chkList:
- chkList.append(wildcard)
- print str(queryCount) + ': ' +parentsub
- PARAMS = {'q': wildcard, 'jurisdiction_code': 'us_az', 'api_token': key}
- URL = "https://api.opencorporates.com/v0.4/companies/search?"
- r = requests.get(url=URL, params=PARAMS)
- print r.url
- data = r.json()
- #status = r.status_code
- #print data
- get_companies(data['results']['companies'])
- else:
- pass
- print ""
- print "Total matched: " + str(len(results))
- df['company_id'] = pd.Series(results)
- df.to_csv('hoas_out_2.csv')
- #print r.url
- # data = r.json()
- # json.dump(data, companiesfile)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement