Untitled


# coding: utf-8

# In[1]:


import pandas as pd
import json
import requests
import re


# In[2]:


#https://gis.mcassessor.maricopa.gov/arcgis/rest/services/Parcels/MapServer/0/query?where=PUC+%3D+%270261%27&text=*&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=100&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson
#https://gis.mcassessor.maricopa.gov/arcgis/rest/services/MaricopaDynamicQueryService/MapServer/3/query?where=PUC%3D%270261%27+AND+PHYSICAL_CITY+%3D%27CHANDLER%27&text=*&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=*&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentsOnly=false&datumTransformation=&parameterValues=&rangeValues=&f=pjson
df = pd.read_csv('hoas/common_parcels_dissolved.csv')


# In[3]:


stopwords = ['ASSOC', 'HOMEOWNER\'S', 'INC', 'IN', 'ASSN', 'HOA', 'ASSO', 'NO', 'ASS', "H", "O", "A", "THE"]
hoaList = []
resultList = []

jurisdiction_code = 'us_az'
key = 'dKIhgoGMyg2obcnxZdlg'
api_key = ''


if key:
    api_key = '&api_token=' + key + '&per_page=100'

results =[]

def get_companies(companies):

    count = 0
    company_id = None
    for item in companies:
        company = item['company']
        #grab the first company id
        company_id = company['company_number']
        print company_id
        count += 1

    if company_id is not None and count == 1:
        results.append(company_id)
    else:
        pass

    #print results


def hasNumbers(inputString):
    #return
    return any(char.isdigit() for char in inputString)


for index, row in df.iloc[499:].iterrows():
    hoaList.append({'name':row['OWNER'], 'zipcode':row['ZIP_CODE']})

chkList = []
queryCount = 0
for elem in hoaList:
    queryCount += 1
    querywords = elem['name'].split()
    resultwords  = [word for word in querywords if word not in stopwords]
    result = ' '.join(resultwords)
    #print result
    if hasNumbers(result) == True:
        try:
            parentsub = re.split(r'(^[^\d]+)', result)[1]
            wildcard = '*' + parentsub.rstrip() + '*'
        except:
            print ""
            print '!!! ' + result + ' !!!'
            print ""

    else:

        parentsub = result
        wildcard = '*' + parentsub.rstrip() + '*'

    if wildcard not in chkList:
        chkList.append(wildcard)
        print str(queryCount) + ': ' +parentsub
        PARAMS = {'q': wildcard, 'jurisdiction_code': 'us_az', 'api_token': key}
        URL = "https://api.opencorporates.com/v0.4/companies/search?"
        r = requests.get(url=URL, params=PARAMS)
        print r.url
        data = r.json()
        #status = r.status_code
        #print data
        get_companies(data['results']['companies'])

    else:
        pass

print ""
print "Total matched: " + str(len(results))
df['company_id'] = pd.Series(results)
df.to_csv('hoas_out_2.csv')

    #print r.url
#     data = r.json()
# json.dump(data, companiesfile)