blight data stuff New Orleans project

import requests  # needs to be installed

from urllib.request import urlopen, Request
import json
import urllib.parse
import string

def remove_whitespace(in_string: str):
    return in_string.translate(str.maketrans(dict.fromkeys(string.whitespace)))

def url_encode(string: str):
    return urllib.parse.quote(f"{string}")


def get_blight_data(start_date="05/2019", end_date="08/2019"):
    search_term = "*"
    status = ""
    step = ""
    workflow = "code_enforcement"
    #start_date = ...
    #end_date = ...
    boundary_id = ""
    page_id = 1 # seems to start at 1 and not at 0

    url = f"https://blightstatus.nola.gov/api/addresses/search?search_term={url_encode(search_term)}&status={url_encode(status)}&step={url_encode(step)}&workflow={url_encode(workflow)}&start_date={url_encode(start_date)}&end_date={url_encode(end_date)}&boundary_id={url_encode(boundary_id)}&page={url_encode(page_id)}"

    result_dataset = []

    response = requests.get(url)
    current_json_data = response.json()

    # on every iteration
    dataset_size = int(current_json_data['page_total'])
    result_dataset += current_json_data['features']

    # get this only once
    #total_dataset_size = int(current_json_data['total'])

    # load next page while that page has any data.
    while dataset_size > 0 :
        page_id += 1

        url = f"https://blightstatus.nola.gov/api/addresses/search?search_term={url_encode(search_term)}&status={url_encode(status)}&step={url_encode(step)}&workflow={url_encode(workflow)}&start_date={url_encode(start_date)}&end_date={url_encode(end_date)}&boundary_id={url_encode(boundary_id)}&page={url_encode(page_id)}"
        response = requests.get(url)
        current_json_data = response.json()
        dataset_size = int(current_json_data['page_total'])
        result_dataset += current_json_data['features']

    return result_dataset


def get_property_data(x,y):

    parameters= f"{x},{y}"
    parameters = remove_whitespace(parameters)


    url = f'https://gis.nola.gov/arcgis/rest/services/CompositePIN3/GeocodeServer/reverseGeocode?location={url_encode(parameters)}&f=json'


    # I suggest using firefox and look at the network analysis stuff, especially the XHR requests and look at the request and response header.
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Origin': 'http://property.nola.gov',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:68.0) Gecko/20100101 Firefox/68.0',
        'Referrer' : 'http://property.nola.gov/'
    }

    #print(url)
    response = requests.get(url, headers=headers)

    if response.ok:

        dictionary = response.json()
        return dictionary
    else:
        return None


if __name__ == "__main__":

    # maybe one can do some stuff with the address ids?
    # could be found somewhere else, like in the property viewer data or so.
    blight_data = get_blight_data(start_date="08/2019", end_date="08/2019")

    print("Retrieved", len(blight_data), "blight datasets")


    counter = 1

    for elem in blight_data:
        x,y = elem['geometry']['coordinates']

        print(f"({counter}/{len(blight_data)})({x},{y})", get_property_data(x,y))
        #print(matched_elem)
        counter += 1