Guest User

Untitled

a guest
Jun 21st, 2018
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.29 KB | None | 0 0
  1. function [mu, C, iters, err] = fkmeansMA(X, k)
  2. % find the centers of k clusters in the set of data X
  3. % X is an m x n matrix of n, m-dimensional points
  4. % k is the number of desired centers for the data
  5. % mu is the coordinates of the centers of each cluster
  6. % C is a vector, where each point in X, i is in cluster C(i)
  7. % iters is the number of iterations required to converge on mu
  8.  
  9. fuzz = 3.75;
  10.  
  11. [m n] = size(X);
  12. mn = min(min(X));
  13. mx = max(max(X));
  14.  
  15. % initial guesses are random numbers between the minimum
  16. % and maximum values in X. This is probably the part that would most
  17. % benefit from a slightly more intelligent method.
  18. mu = randn(m,k)*(mx - mn) + mn;
  19.  
  20. % initialize probability matrices
  21. % probs is a k by n matrix, where each row i represents cluster i
  22. % and each column j represents X(i)
  23. prev = zeros(k,n);
  24. probs = ones(k,n);
  25.  
  26. % count how many iterations it takes to converge
  27. % we restrict this to 100 iterations so it doesn't get stuck
  28. iters = 0;
  29. while ~isGoodEnough(probs, prev) && iters < 100
  30. prev = probs; % store the probabilities calculated in the last run
  31.  
  32. % find the probabilities that each point i is in each cluster
  33. for i = 1:n
  34. dists = calcDistances(X(:,i), mu);
  35. probs(:,i) = clusterProb(dists);
  36. end
  37.  
  38. % update our guess for each center by taking the weighted average
  39. % of all of the points, using the previously calculated
  40. % probabilities
  41. for i = 1:k
  42. P = probs(i,:);
  43. mu(:,i) = weightedAverage(X, P, fuzz);
  44. end
  45.  
  46. iters = iters + 1;
  47. end
  48. [i, C] = max(probs);
  49.  
  50. err = 0;
  51. for i = 1:k
  52. for j = X(C==k)
  53. err = err + dist(j,mu(:,i)).^2;
  54. end
  55. end
  56. end
  57.  
  58. function avg = weightedAverage(points, probs, fuzz)
  59. % calculate the weighted averages
  60. % points is a matrix of all the points
  61. % probabilities is the probability that each point is in cluster i
  62. [m n] = size(points);
  63. avg = zeros(m,1);
  64. div = 0;
  65. % weighted average formula taken from clustering article on wikipedia
  66. for j = 1:n
  67. avg = avg + (probs(j).^fuzz * points(:,j));
  68. div = div + probs(j).^fuzz;
  69. end
  70. avg = avg/div;
  71. end
  72.  
  73. function dists = calcDistances(point, centers)
  74. % calculate the distances of point from each of the centers
  75. [m, k] = size(centers);
  76. dists = zeros(1,k);
  77. for j = 1:k
  78. dists(j) = dist(point, centers(:,j));
  79. end
  80. end
  81.  
  82. function good = isGoodEnough(probs, prev)
  83. % tests for convergence
  84. % we claim that we've converged when the difference between two iterations
  85. % of kmeans is less than iota
  86. iota = .001;
  87. difference = abs(probs - prev);
  88. good = all(all(difference < iota));
  89. end
  90.  
  91. function probs = clusterProb(dists)
  92. % calculate the probability that a point is in a given cluster based on its
  93. % distance from that cluster. We use the reciprocal of the distances since
  94. % smaller numbers are better
  95. invs = 1 ./ dists;
  96. tot = sum(invs);
  97. probs = invs ./ tot;
  98. end
  99.  
  100. function r = dist(x,y)
  101. % calculate the distance between x and y.
  102. % NOTE: doesn't actually calculate the distance. we only care about
  103. % relative values, and taking the square root here will be slower, but
  104. % won't change minimum distances
  105. r = sqrt(sum((x-y).^2));
  106. end
Add Comment
Please, Sign In to add comment