Advertisement
Guest User

Untitled

a guest
Aug 19th, 2017
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.35 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. '''
  3. Created on 21-05-2011
  4.  
  5. @author: jakub
  6. '''
  7. import numpy as np
  8. from scipy.spatial.distance import mahalanobis, euclidean
  9. #===============================================================================
  10. # CPoint
  11. # @note: A class representating point in n-dimmensional enviroment
  12. #===============================================================================
  13. class CPoint(object):
  14. #===========================================================================
  15. # __init__
  16. # @param coords: coords of point
  17. # @param n : dimmensions of a point
  18. # @note: creates a CPoint object
  19. #===========================================================================
  20. def __init__(self, cords):
  21. self.coords = cords
  22. self.n = len(cords)
  23. #===========================================================================
  24. # __repr__
  25. # @return: string representaion of a point
  26. #===========================================================================
  27. def __repr__(self):
  28. return "P" + str(self.coords)
  29. #===============================================================================
  30. # CCluster
  31. # @note: A class representating a cluster of points in n-dimmentional enviroment
  32. #===============================================================================
  33. class CCluster(object):
  34. #===========================================================================
  35. # __init__
  36. # @param points: set of points which of is cluster made
  37. #===========================================================================
  38. def __init__(self, points):
  39. if len(points) == 0:
  40. raise Exception("Empty cluster")
  41. self.points = points
  42. self.nDim = points[0].n
  43. self.n = len(self.points)
  44. for point in points:
  45. if point.n != self.nDim:
  46. raise Exception("Wrong points dimmensions")
  47. #=======================================================================
  48. # __repr__
  49. # @return: string representaion of a cluster
  50. #=======================================================================
  51. def __repr__(self):
  52. return "C" + str(self.points)
  53. def merge(self, secondCluster):
  54. self.points.extend(secondCluster)
  55. self.n = len(self.points)
  56. if self.nDim!=secondCluster.nDim:
  57. raise Exception("Wrong points dimmensions")
  58. #===============================================================================
  59. # CMetric
  60. # @note: Class representating various metrics
  61. #===============================================================================
  62. class CMetric(object):
  63. #===========================================================================
  64. # __init__
  65. # @param name: string which tells which metric are we gonna use
  66. # "euclides", "mahalanobis"
  67. #===========================================================================
  68. def __init__(self, name):
  69. self.name = name
  70. #===========================================================================
  71. # computeDistance
  72. # @param pointA: first point
  73. # @param pointB: second point
  74. # @return: distance between points A and B in given metric
  75. #===========================================================================
  76. def computeDistance(self, pointA, pointB):
  77. if pointA.n != pointB.n:
  78. raise Exception("Wrong points dimmensions")
  79. if self.name == "mahalanobis":
  80. V = np.cov(np.concatenate((pointA.coords, pointB.coords)).T)
  81. return mahalanobis(pointA.coords, pointB.coords, V)
  82. if self.name == "euclides":
  83. return euclidean(pointA.coords, pointB.coords)
  84.  
  85. class CDistance:
  86. #===========================================================================
  87. # __init__
  88. # @param distanceString: one of the: max, min, pga (pair-group average), pgc (pair-group centroid), ward
  89. # @param metricString: one of the: euclides, mahalanobis
  90. #===========================================================================
  91. def __init__(self, distanceString, metricString):
  92. self.distanceString = distanceString
  93. self.metric = CMetric(metricString)
  94.  
  95. #===========================================================================
  96. # getDistanceBetweenPoints
  97. # @param PointA: first point of cumputing
  98. # @param PointB: second point of cumputing
  99. # @return: distance with metric given in __init__
  100. #===========================================================================
  101. def getDistanceBetweenPoints(self, PointA, PointB):
  102. if (PointA.n != PointB.n):
  103. raise Exception("Wrong number of dimensions")
  104. return self.metric.computeDistance(PointA, PointB)
  105.  
  106. #===========================================================================
  107. # __getDistanceArrayBetweenPointsFromClusters
  108. # @param ClustA: first cluster to compute
  109. # @param ClustB: second cluster to compute
  110. # @return: len of ClustA x len of ClustB array with distances between each point from them
  111. #===========================================================================
  112. def __getDistanceArrayBetweenPointsFromClusters(self, ClustA, ClustB):
  113. out = np.zeros((ClustA.n, ClustB.n))
  114. for i in range(ClustA.n):
  115. for j in range(ClustB.n):
  116. out[i][j] = self.getDistanceBetweenPoints(ClustA.points[i], ClustB.points[j])
  117. return out
  118.  
  119. #===========================================================================
  120. # __getDistanceBetweenNearestElementsOfCluster
  121. # @param ClustA: first cluster to compute
  122. # @param ClustB: second cluster to compute
  123. # @return: floating point number with distance
  124. #===========================================================================
  125. def __getDistanceBetweenNearestElementsOfCluster(self, ClustA, ClustB):
  126. distancesMatrix = self.__getDistanceArrayBetweenPointsFromClusters(ClustA, ClustB)
  127. minVal = distancesMatrix[0][0]
  128. for i in range(ClustA.n):
  129. for j in range(ClustB.n):
  130. if (distancesMatrix[i][j] < minVal):
  131. minVal = distancesMatrix[i][j]
  132. return minVal
  133.  
  134. #===========================================================================
  135. # __getDistanceBetweenFurthestElementsOfCluster
  136. # @param ClustA: first cluster to compute
  137. # @param ClustB: second cluster to compute
  138. # @return: floating point number with distance
  139. #===========================================================================
  140. def __getDistanceBetweenFurthestElementsOfCluster(self, ClustA, ClustB):
  141. distancesMatrix = self.__getDistanceArrayBetweenPointsFromClusters(ClustA, ClustB)
  142. maxVal = distancesMatrix[0][0]
  143. for i in range(ClustA.n):
  144. for j in range(ClustB.n):
  145. if (distancesMatrix[i][j] > maxVal):
  146. maxVal = distancesMatrix[i][j]
  147. return maxVal
  148. #===========================================================================
  149. # __getPairGroupAverageDistance
  150. # @param ClustA: first cluster to compute
  151. # @param ClustB: second cluster to compute
  152. # @return: floating point number with distance
  153. #===========================================================================
  154. def __getPairGroupAverageDistance(self, ClustA, ClustB):
  155. distancesMatrix = self.__getDistanceArrayBetweenPointsFromClusters(ClustA, ClustB)
  156. return np.average(distancesMatrix)
  157.  
  158. def __getPairGroupCentroidDistance(self, clusterA, clusterB):
  159. centroidPointClusterA = self.getCentroid(clusterA)
  160. centroidPointClusterB = self.getCentroid(clusterB)
  161. return self.getDistanceBetweenPoints(centroidPointClusterA, centroidPointClusterB)
  162.  
  163. def getCentroid(self, cluster):
  164. centroidPoint = []
  165. for dim in range(cluster.nDim):
  166. centroidPoint.append(0.0)
  167. for point in cluster.points:
  168. centroidPoint[dim] += float(point.coords[dim])
  169. centroidPoint[dim] = centroidPoint[dim] / len(cluster.points)
  170. centroidPoint = CPoint(centroidPoint)
  171. return centroidPoint
  172.  
  173. def __getWardDistance(self, ClustA, ClustB):
  174. PointA = self.getCentroid(ClustA)
  175. PointB = self.getCentroid(ClustB)
  176. n1 = PointA.n;
  177. n2 = PointB.n
  178. if PointA.nDim != PointB.nDim :
  179. raise Exception("Wrong dimensions of clusters")
  180. coords = np.zeros(n1)
  181. for i in range(n1):
  182. coords[i] = PointA.coords[i] - PointB.coords[i]
  183. return np.dot(coords.transpose(), coords) * n1 * n2 / (n1 + n2)
  184.  
  185. def computeDistance(self, clusterA, clusterB):
  186. if (self.distanceString == 'max'):
  187. return self.__getDistanceBetweenFurthestElementsOfCluster(clusterA, clusterB)
  188. elif (self.distanceString == 'min'):
  189. return self.__getDistanceBetweenNearestElementsOfCluster(clusterA, clusterB)
  190. elif (self.distanceString == 'pga'):
  191. return self.__getPairGroupAverageDistance(clusterA, clusterB)
  192. elif (self.distanceString == 'pgc'):
  193. return self.__getPairGroupCentroidDistance(clusterA, clusterB)
  194. elif (self.distanceString == 'ward'):
  195. return self.__getWardDistance(clusterA, clusterB)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement