Advertisement
Guest User

Untitled

a guest
Oct 31st, 2014
149
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.66 KB | None | 0 0
  1. import sys
  2. import numpy as np
  3. import math
  4. from sklearn.neighbors import KernelDensity
  5. from sqlalchemy.sql import select
  6. from sqlalchemy import func
  7. from spams.db.utils import setup_database, get_table
  8. from mpl_toolkits.basemap import Basemap
  9. import matplotlib.pyplot as plt
  10. from spams.mappings import LABEL_PLACE_MAPPING
  11. from sklearn.grid_search import GridSearchCV
  12.  
  13.  
  14. if __name__ == "__main__":
  15.     metadata, connection = setup_database()
  16.     places_location = get_table("places_location", metadata)
  17.     min_lat, max_lat, min_long, max_long = connection.execute(select([func.min(places_location.c.latitude), func.max(places_location.c.latitude), func.min(places_location.c.longitude), func.max(places_location.c.longitude)])).fetchall()[0]
  18.     estimators = {}
  19.     test_set = {}
  20.     training_set = []
  21.     for label_id in xrange(1, 11):
  22.         label = LABEL_PLACE_MAPPING[label_id]
  23.         lat_long_query = select([places_location.c.latitude, places_location.c.longitude]).where(places_location.c.place_label_int==label_id)
  24.         results = connection.execute(lat_long_query).fetchall()
  25.         result_len = len(results)
  26.         # Use 1 % of the data as test set
  27.         test_len = math.ceil(result_len * 0.1)
  28.         xy = [(float(r[0]), float(r[1])) for r in results]
  29.         test_indices = np.random.choice(result_len, test_len)
  30.         training_set =[]
  31.         test_set[label] = []
  32.         training_set = []
  33.         # Separate test and training values
  34.         for index, val in enumerate(xy):
  35.             if index in test_indices:
  36.                 test_set[label].append(val)
  37.             else:
  38.                 training_set.append(val)
  39.  
  40.         xy = np.array(training_set)
  41.         # Convert to radians
  42.         xy *= np.pi /180.
  43.         params = {'bandwidth': np.logspace(-3, 3, 20)}
  44.         # do a grid search
  45.         grid = GridSearchCV(KernelDensity(metric="haversine", algorithm="ball_tree"), params)
  46.         grid.fit(xy)
  47.         estimators[label] = grid.best_estimator_
  48.         print grid.best_estimator_.bandwidth
  49.     accurate = 0.0
  50.     counter = 0.0
  51.     for label in test_set:
  52.         for value in test_set[label]:
  53.             value = [v * np.pi / 180. for v in value]
  54.             max_prob_density =  -sys.maxint - 1
  55.             best_label = ""
  56.             counter += 1
  57.             for key in estimators:
  58.                 score = estimators[key].score(value)
  59.                 if score > max_prob_density:
  60.                     max_prob_density = score
  61.                     best_label = key
  62.             #print max_prob_density
  63.             if best_label == label:
  64.                 accurate +=1
  65.     print accurate/counter * 100
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement