Advertisement
makispaiktis

ML - my_kmeans2 (Epsilon + plots + Voronoi for every k)

Oct 12th, 2022 (edited)
773
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MatLab 6.65 KB | None | 0 0
  1. clear all
  2. close all
  3. clc
  4.  
  5.  
  6. % *************************************************************************
  7. % *************************************************************************
  8.  
  9. % Data - Generate random 2-D points
  10. % Define N, K, epsilon_thr
  11. N = 300;
  12. K = 4;
  13. epsilon_thr = 0.005;
  14.  
  15.  
  16. % Ns = [2, 3, 5] * 10;
  17. % means = [0.5, 1, 1.5];
  18. % stds = [0.3, 0.4, 0.5];
  19. % N = sum(Ns);
  20. % LEN = length(Ns);
  21. % [x, y] = generatePoints(Ns, means, stds);
  22. x = rand(1, N);
  23. y = rand(1, N);
  24. points = [x; y];
  25. colors1 = ["green", "blue", "yellow", "magenta", "cyan", "black"];
  26. colors2 = zeros(K, 3);
  27. for k = 1 : K
  28.     colors2(k, :) = [rand, rand, rand];
  29. end
  30.  
  31. colors = [];
  32. if K <= length(colors1)
  33.     colors = colors1;
  34. else
  35.     colors = colors2;
  36. end
  37.  
  38.  
  39.  
  40. % *************************************************************************
  41. % *************************************************************************
  42.  
  43. % Algorithm's Initialization
  44. min_x = min(x);
  45. max_x = max(x);
  46. min_y = min(y);
  47. max_y = max(y);
  48. % Initialize the centroids as some points of the dataset
  49. z = floor(linspace(1, N, K));
  50. centroids = zeros(2, K);
  51. for k = 1 : K
  52.     centroids(1, k) = points(1, z(k));
  53.     centroids(2, k) = points(2, z(k));
  54. end
  55.  
  56. figure();
  57. plot(x, y, 'bo');
  58. title("Iteration 0 - Random centroids");
  59. xlabel("x");
  60. ylabel("y");
  61. hold on
  62. plot(centroids(1, :), centroids(2, :), 'r+', 'MarkerSize', 10);
  63.  
  64.  
  65.  
  66. % *************************************************************************
  67. % *************************************************************************
  68.  
  69. % Generate 2 useful matrices: 'distances' = K x N, where K = # of centroids
  70. % and N = # of points, so each column 'j' contains the distance of j-th
  71. % point with each of the k centroids
  72. distances = zeros(K, N);
  73. % And 'groups' = 1 x N, where each element is 1, 2, ..., or 'K' and shows
  74. % to which group-centroid the element belongs
  75. groups = zeros(1, N);
  76.  
  77. % First, I have to calculate the distances matrix between the existing
  78. % centroids and the given random points
  79. for n = 1 : N
  80.     point = points(:, n);
  81.     for k = 1 : K
  82.         centroid = centroids(:, k);
  83.         distances(k, n) = distance(centroid, point);
  84.     end
  85.     % After the completion of n-th column, I can determine the index of
  86.     % centroid this item belongs to
  87.     column = distances(:, n);
  88.     [MIN, index] = min(column);
  89.     groups(n) = index;
  90. end
  91. display('***********************************************************');
  92. round = 0;
  93. disp("        Iteration " + num2str(round));
  94. centroids
  95. size(centroids);
  96. % disp("Centroids = " + mat2str(centroids));
  97. display(' ');
  98. display(' ');
  99. groups;
  100.  
  101.  
  102.  
  103.  
  104.  
  105.  
  106. % *************************************************************************
  107. % *************************************************************************
  108.  
  109. % Iterations with for-loop
  110. MAX_ROUND = 30;
  111. initial_centroids = centroids;
  112. epsilon = Inf;
  113. round = 1;
  114.  
  115. while round <= MAX_ROUND && epsilon > epsilon_thr
  116.    
  117.     initial_centroids = centroids;
  118.     centroids = zeros(2, K);
  119.     display('***********************************************************');
  120.     disp("        Iteration " + num2str(round));
  121.     round;
  122.     points_around_centroid = zeros(1, K);
  123.     coord_around_centroid = zeros(2, K);
  124.     % This vector will contain the indeces of points that belong to centroid 'k'
  125.     % I have to find out the new centroids based on the 'groups' vector
  126.     for n = 1 : N
  127.         points_around_centroid(groups(n)) = points_around_centroid(groups(n)) + 1;
  128.         coord_around_centroid(1, groups(n)) = coord_around_centroid(1, groups(n)) + points(1, n);
  129.         coord_around_centroid(2, groups(n)) = coord_around_centroid(2, groups(n)) + points(2, n);
  130.     end
  131.     points_around_centroid;
  132.     coord_around_centroid;
  133.     % Now, my 2 vectors contain for each 'k': the sum of coordinates of the
  134.     % points around this k-th centroid and the number of these points
  135.     for k = 1 : K
  136.         centroids(1, k) = coord_around_centroid(1, k) / points_around_centroid(k);
  137.         centroids(2, k) = coord_around_centroid(2, k) / points_around_centroid(k);
  138.     end
  139.     centroids;
  140.     % Maybe in some iterations a centroid has no corresponding points
  141.     % around it, so:
  142.     for k = 1 : K
  143.         if isnan(centroids(1, k)) == 1 || isnan(centroids(2, k)) == 1
  144.             disp("NaN for centroid " + num2str(k));
  145.             centroids(1, k) = points(1, floor(N/2));
  146.             centroids(2, k) = points(2, floor(N/2));
  147.         end
  148.     end
  149.     centroids
  150.     size(centroids);
  151.    
  152.    
  153.    
  154.    
  155.    
  156.    
  157.    
  158.    
  159.     % Now, new centroids have determined - Update 'distances', 'groups'
  160.     for n = 1 : N
  161.         point = points(:, n);
  162.         for k = 1 : K
  163.             centroid = centroids(:, k);
  164.             distances(k, n) = distance(centroid, point);
  165.         end
  166.         column = distances(:, n);
  167.         [MIN, index] = min(column);
  168.         groups(n) = index;
  169.     end
  170.    
  171.    
  172.    
  173.    
  174.    
  175.     figure();
  176.     for n = 1 : N
  177.         cluster_k = groups(n);
  178.         if K <= length(colors1)
  179.             plot(points(1, n), points(2, n), 'o', 'Color', colors1(cluster_k));
  180.         else
  181.             plot(points(1, n), points(2, n), 'o', 'Color', colors2(cluster_k, :));
  182.         end
  183.         hold on
  184.     end
  185.     plot(centroids(1, :), centroids(2, :),  'r+', 'MarkerSize', 10);
  186.     hold on
  187.     voronoi(centroids(1, :), centroids(2, :));
  188.     title("Iteration " + num2str(round));
  189.     epsilon = error(initial_centroids, centroids)
  190.     round = round + 1;
  191.     % disp("Centroids = " + mat2str(centroids));
  192.     display('***********************************************************');
  193.     display(' ');
  194.     display(' ');
  195.    
  196. end
  197.  
  198.  
  199.  
  200.  
  201.  
  202.  
  203.  
  204.  
  205.  
  206. % *************************************************************************
  207. % *************************************************************************
  208.  
  209. % Auxiliary Functions
  210. function plot_spots(x, y, TITLE)
  211.     figure();
  212.     plot(x, y, 'bo');
  213.     title(TITLE);
  214.     xlabel("x");
  215.     ylabel("y");
  216. end
  217.  
  218. function [x, y] = generatePoints(Ns, means, stds)
  219.     if length(Ns) ~= length(means) || length(Ns) ~= length(stds) || length(means) ~= length(stds)
  220.         x = linspace(0, 3, 4);
  221.         y = linspace(0, 3, 4);
  222.         display('Error');
  223.     end
  224.     x = [];
  225.     y = [];
  226.     for i = 1 : length(Ns)
  227.         temp_x = rand(1, Ns(i)) * stds(i) + means(i);
  228.         temp_y = rand(1, Ns(i)) * stds(i) + means(i);
  229.         x = [x temp_x];
  230.         y = [y temp_y];
  231.     end
  232. end
  233.  
  234.  
  235. function d = distance(p1, p2)
  236.     % p1 = [2 3]'    p2 = [1 4]'
  237.     d = sqrt((p1(1) - p2(1))^2 + (p1(2) - p2(2))^2);
  238. end
  239.  
  240. function epsilon = error(a, b)
  241.     % a, b = 2 x K
  242.     epsilon = norm(a - b);
  243. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement