Guest User

Untitled

a guest
May 27th, 2016
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.68 KB | None | 0 0
  1. # Copyright (c) 2012, Ryan Gomba
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. #
  7. # 1. Redistributions of source code must retain the above copyright notice, this
  8. # list of conditions and the following disclaimer.
  9. # 2. Redistributions in binary form must reproduce the above copyright notice,
  10. # this list of conditions and the following disclaimer in the documentation
  11. # and/or other materials provided with the distribution.
  12. #
  13. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  14. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  15. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  16. # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
  17. # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  18. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  19. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  20. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  21. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. #
  24. # The views and conclusions contained in the software and documentation are those
  25. # of the authors and should not be interpreted as representing official policies,
  26. # either expressed or implied, of the FreeBSD Project.
  27.  
  28. import math
  29. import json
  30.  
  31. ################################################################################
  32. # POINT
  33. ################################################################################
  34.  
  35. class Point:
  36.  
  37. def __init__(self, latitude, longitude):
  38.  
  39. self.latitude = latitude
  40. self.longitude = longitude
  41. self.cd = None # core distance
  42. self.rd = None # reachability distance
  43. self.processed = False # has this point been processed?
  44.  
  45. # --------------------------------------------------------------------------
  46. # calculate the distance between any two points on earth
  47. # --------------------------------------------------------------------------
  48.  
  49. def distance(self, point):
  50.  
  51. # convert coordinates to radians
  52.  
  53. p1_lat, p1_lon, p2_lat, p2_lon = [math.radians(c) for c in
  54. self.latitude, self.longitude, point.latitude, point.longitude]
  55.  
  56. numerator = math.sqrt(
  57. math.pow(math.cos(p2_lat) * math.sin(p2_lon - p1_lon), 2) +
  58. math.pow(
  59. math.cos(p1_lat) * math.sin(p2_lat) -
  60. math.sin(p1_lat) * math.cos(p2_lat) *
  61. math.cos(p2_lon - p1_lon), 2))
  62.  
  63. denominator = (
  64. math.sin(p1_lat) * math.sin(p2_lat) +
  65. math.cos(p1_lat) * math.cos(p2_lat) *
  66. math.cos(p2_lon - p1_lon))
  67.  
  68. # convert distance from radians to meters
  69. # note: earth's radius ~ 6372800 meters
  70.  
  71. return math.atan2(numerator, denominator) * 6372800
  72.  
  73. # --------------------------------------------------------------------------
  74. # point as GeoJSON
  75. # --------------------------------------------------------------------------
  76.  
  77. def to_geo_json_dict(self, properties=None):
  78.  
  79. return {
  80. 'type': 'Feature',
  81. 'geometry': {
  82. 'type': 'Point',
  83. 'coordinates': [
  84. self.longitude,
  85. self.latitude,
  86. ]
  87. },
  88. 'properties': properties,
  89. }
  90.  
  91. def __repr__(self):
  92. return '(%f, %f)' % (self.latitude, self.longitude)
  93.  
  94. ################################################################################
  95. # CLUSTER
  96. ################################################################################
  97.  
  98. class Cluster:
  99.  
  100. def __init__(self, points):
  101.  
  102. self.points = points
  103.  
  104. # --------------------------------------------------------------------------
  105. # calculate the centroid for the cluster
  106. # --------------------------------------------------------------------------
  107.  
  108. def centroid(self):
  109.  
  110. return Point(sum([p.latitude for p in self.points])/len(self.points),
  111. sum([p.longitude for p in self.points])/len(self.points))
  112.  
  113. # --------------------------------------------------------------------------
  114. # calculate the region (centroid, bounding radius) for the cluster
  115. # --------------------------------------------------------------------------
  116.  
  117. def region(self):
  118.  
  119. centroid = self.centroid()
  120. radius = reduce(lambda r, p: max(r, p.distance(centroid)), self.points)
  121. return centroid, radius
  122.  
  123. # --------------------------------------------------------------------------
  124. # cluster as GeoJSON
  125. # --------------------------------------------------------------------------
  126.  
  127. def to_geo_json_dict(self, user_properties=None):
  128.  
  129. center, radius = self.region()
  130. properties = { 'radius': radius }
  131. if user_properties: properties.update(user_properties)
  132.  
  133. return {
  134. 'type': 'Feature',
  135. 'geometry': {
  136. 'type': 'Point',
  137. 'coordinates': [
  138. center.longitude,
  139. center.latitude,
  140. ]
  141. },
  142. 'properties': properties,
  143. }
  144.  
  145. ################################################################################
  146. # OPTICS
  147. ################################################################################
  148.  
  149. class Optics:
  150.  
  151. def __init__(self, points, max_radius, min_cluster_size):
  152.  
  153. self.points = points
  154. self.max_radius = max_radius # maximum radius to consider
  155. self.min_cluster_size = min_cluster_size # minimum points in cluster
  156.  
  157. # --------------------------------------------------------------------------
  158. # get ready for a clustering run
  159. # --------------------------------------------------------------------------
  160.  
  161. def _setup(self):
  162.  
  163. for p in self.points:
  164. p.rd = None
  165. p.processed = False
  166. self.unprocessed = [p for p in self.points]
  167. self.ordered = []
  168.  
  169. # --------------------------------------------------------------------------
  170. # distance from a point to its nth neighbor (n = min_cluser_size)
  171. # --------------------------------------------------------------------------
  172.  
  173. def _core_distance(self, point, neighbors):
  174.  
  175. if point.cd is not None: return point.cd
  176. if len(neighbors) >= self.min_cluster_size - 1:
  177. sorted_neighbors = sorted([n.distance(point) for n in neighbors])
  178. point.cd = sorted_neighbors[self.min_cluster_size - 2]
  179. return point.cd
  180.  
  181. # --------------------------------------------------------------------------
  182. # neighbors for a point within max_radius
  183. # --------------------------------------------------------------------------
  184.  
  185. def _neighbors(self, point):
  186.  
  187. return [p for p in self.points if p is not point and
  188. p.distance(point) <= self.max_radius]
  189.  
  190. # --------------------------------------------------------------------------
  191. # mark a point as processed
  192. # --------------------------------------------------------------------------
  193.  
  194. def _processed(self, point):
  195.  
  196. point.processed = True
  197. self.unprocessed.remove(point)
  198. self.ordered.append(point)
  199.  
  200. # --------------------------------------------------------------------------
  201. # update seeds if a smaller reachability distance is found
  202. # --------------------------------------------------------------------------
  203.  
  204. def _update(self, neighbors, point, seeds):
  205.  
  206. # for each of point's unprocessed neighbors n...
  207.  
  208. for n in [n for n in neighbors if not n.processed]:
  209.  
  210. # find new reachability distance new_rd
  211. # if rd is null, keep new_rd and add n to the seed list
  212. # otherwise if new_rd < old rd, update rd
  213.  
  214. new_rd = max(point.cd, point.distance(n))
  215. if n.rd is None:
  216. n.rd = new_rd
  217. seeds.append(n)
  218. elif new_rd < n.rd:
  219. n.rd = new_rd
  220.  
  221. # --------------------------------------------------------------------------
  222. # run the OPTICS algorithm
  223. # --------------------------------------------------------------------------
  224.  
  225. def run(self):
  226.  
  227. self._setup()
  228.  
  229. # for each unprocessed point (p)...
  230.  
  231. while self.unprocessed:
  232. point = self.unprocessed[0]
  233.  
  234. # mark p as processed
  235. # find p's neighbors
  236.  
  237. self._processed(point)
  238. point_neighbors = self._neighbors(point)
  239.  
  240. # if p has a core_distance, i.e has min_cluster_size - 1 neighbors
  241.  
  242. if self._core_distance(point, point_neighbors) is not None:
  243.  
  244. # update reachability_distance for each unprocessed neighbor
  245.  
  246. seeds = []
  247. self._update(point_neighbors, point, seeds)
  248.  
  249. # as long as we have unprocessed neighbors...
  250.  
  251. while(seeds):
  252.  
  253. # find the neighbor n with smallest reachability distance
  254.  
  255. seeds.sort(key=lambda n: n.rd)
  256. n = seeds.pop(0)
  257.  
  258. # mark n as processed
  259. # find n's neighbors
  260.  
  261. self._processed(n)
  262. n_neighbors = self._neighbors(n)
  263.  
  264. # if p has a core_distance...
  265.  
  266. if self._core_distance(n, n_neighbors) is not None:
  267.  
  268. # update reachability_distance for each of n's neighbors
  269.  
  270. self._update(n_neighbors, n, seeds)
  271.  
  272. # when all points have been processed
  273. # return the ordered list
  274.  
  275. return self.ordered
  276.  
  277. # --------------------------------------------------------------------------
  278.  
  279. def cluster(self, cluster_threshold):
  280.  
  281. clusters = []
  282. separators = []
  283.  
  284. for i in range(len(self.ordered)):
  285. this_i = i
  286. next_i = i + 1
  287. this_p = self.ordered[i]
  288. this_rd = this_p.rd if this_p.rd else float('infinity')
  289.  
  290. # use an upper limit to separate the clusters
  291.  
  292. if this_rd > cluster_threshold:
  293. separators.append(this_i)
  294.  
  295. separators.append(len(self.ordered))
  296.  
  297. for i in range(len(separators) - 1):
  298. start = separators[i]
  299. end = separators[i + 1]
  300. if end - start >= self.min_cluster_size:
  301. clusters.append(Cluster(self.ordered[start:end]))
  302.  
  303. return clusters
  304.  
  305. # LOAD SOME POINTS
  306.  
  307. points = [
  308. Point(37.769006, -122.429299), # cluster #1
  309. Point(37.769044, -122.429130), # cluster #1
  310. Point(37.768775, -122.429092), # cluster #1
  311. Point(37.776299, -122.424249), # cluster #2
  312. Point(37.776265, -122.424657), # cluster #2
  313. ]
  314.  
  315. optics = Optics(points, 100, 2) # 100m radius for neighbor consideration, cluster size >= 2 points
  316. optics.run() # run the algorithm
  317. clusters = optics.cluster(50) # 50m threshold for clustering
  318.  
  319. for cluster in clusters:
  320. print cluster.points
Add Comment
Please, Sign In to add comment