Untitled

"""
I think I will need to:
	- delete line 47
	sorted_df = sorted_df.reset_index(drop=True) causes extra processing with no real benefit in this case.
	And updating the index for a large dataset can be very expensive

	-or better still, on lines 46:48, I could replace:
		sorted_df = df.sort_values(by=['distance'])
		sorted_df = sorted_df.reset_index(drop=True)
		trimmed_df = sorted_df.drop('distance', axis=1).head(n)
	with:
		df = df.nsmallest(n, 'distance').drop('distance', axis=1)
	This will handle the sorting, drop the distance column and reassign the dataframe to the df variable, instead of assigning the dataframe to a new variable trimmed_df and allocating more memory.

	- Also, I could use a mergesort instead of the default quicksort as mergesort has a worst case complexity o O(n log n), whereas quicksort has a worst case complexity of O(nxn)

	- another option is to use numpy (argsort) for sorting the distance column rather than the sort_values method. Numpy has been proven to be faster than pandas when sorting

	- another option is to utilize scipy's squareform, pdist packages for computing distance

"""

def nearest_n_with_package(self,params):
	"""
    returns a list of n coordinates in ascending order of the distance between params['x','y'] coordinate and each coordinate in the dataset
	using the pandas and shapely package
	Args:
    	params (dict): Dictionary containing x, y, n keys

	Returns:
        List: list of objects
    """
	x = float(params['x'])
	y = float(params['y'])
	n = int(params['n'])

	request_point = Point(x, y)
	df = pd.read_csv(self.dataset_path, delimiter=';')

	def distance_calc(row):
		data_point = Point(float(row['x']), float(row['y']))
		return request_point.distance(data_point)

	df['distance'] = df.apply(distance_calc, axis=1)

	sorted_df = df.sort_values(by=['distance'])
	sorted_df = sorted_df.reset_index(drop=True)
	trimmed_df = sorted_df.drop('distance', axis=1).head(n)

	json_string = trimmed_df.to_json(orient = "records")

	return json.loads(json_string)


"""Refactored Method"""
def nearest_n_with_package(self,params):
	"""
    returns a list of n coordinates in ascending order of the distance between params['x','y'] coordinate and each coordinate in the dataset
	using the pandas and shapely package
	Args:
    	params (dict): Dictionary containing x, y, n keys
	Returns:
	    List: list of objects
    """
	x = float(params['x'])
	y = float(params['y'])
	n = int(params['n'])

	request_point = Point(x, y)
	df = pd.read_csv(self.dataset_path, delimiter=';')

	def distance_calc(row):
		data_point = Point(float(row['x']), float(row['y']))
		return request_point.distance(data_point)

	df['distance'] = df.apply(distance_calc, axis=1)
	df = df.nsmallest(n, 'distance').drop('distance', axis=1)

	json_string = df.to_json(orient = "records")

	return json.loads(json_string)