Advertisement
makispaiktis

ML - PCA with plots

Oct 11th, 2022 (edited)
1,076
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MatLab 4.97 KB | None | 0 0
  1. clear all
  2. close all
  3. clc
  4.  
  5. % Data - Step 0 (page 18 from Lecture 7)
  6. display('************************************************');
  7. display('************************************************');
  8. display('        0. Initial-original Data');
  9. x11 = [2.5 0.5 2.2 1.9 3.1 2.3 2 1 1.5 1.1]'
  10. x22 = [2.4 0.7 2.9 2.2 3 2.7 1.6 1.1 1.6 0.9]'
  11. n = length(x11);
  12.  
  13. % Step 1 - Minus mean
  14. display('        1. Zero-mean data');
  15. avg1 = mean(x11);
  16. avg2 = mean(x22);
  17. x1 = x11 - avg1
  18. x2 = x22 - avg2
  19. display(' ');
  20.  
  21. figure();
  22. plot(x11, x22, 'rx');
  23. title("10 spots - Initial Data");
  24. % xlim([-2 4]);
  25. % ylim([-2 4]);
  26. hold on
  27. plot(x1, x2, 'bx');
  28. xlabel("x");
  29. ylabel("y");
  30. legend("Original Data", "Zero-mean Data");
  31.  
  32.  
  33. % Steps 2, 3 - Covariance matrix, Eigenvalues, Eigenvectors
  34. display('        2. Find the covariance matrix S');
  35. S = cov(x1, x2)
  36. display(' ');
  37.  
  38. eig_values = eig(S);
  39. l1 = eig_values(1);
  40. l2 = eig_values(2);
  41. lambda = [l1 l2];
  42. display('        3. Find the eigenvalues and eigenvectors');
  43. display(' ');
  44. disp("l_1 = " + num2str(l1));
  45. disp("l_2 = " + num2str(l2));
  46. [eig_vectors, no_need] = eig(S);
  47. u1 = eig_vectors(:, 1);
  48. u2 = -eig_vectors(:, 2);   % I put minus because I want the same results with the lecture (the u2 here and that in lecture have oppposite coordinates)
  49. eig_vectors = [u1 u2];
  50. disp("u_1 = " + mat2str(u1') + "^T");
  51. disp("u_2 = " + mat2str(u2') + "^T");
  52. display(' ');
  53.  
  54. figure();
  55. plot(x1, x2, 'x');
  56. title("Zero-mean Data with VERTICAL eigenvectors");
  57. xlabel("x");
  58. ylabel("y");
  59. hold on
  60. klisi1 = u1(1) / u1(2);
  61. klisi2 = u2(1) / u2(2);
  62. x = linspace(-2, 2.5, 101);
  63. y1 = klisi1 * x;
  64. y2 = klisi2 * x;
  65. plot(x, y1, 'green');
  66. hold on
  67. plot(x, y2, 'blue');
  68. legend("Zero-mean Data", "y = " + num2str(klisi1) + "*x", "y = " + num2str(klisi2) + "*x");
  69.  
  70.  
  71.  
  72. % Step 5 - Transpose of eigenvectors and sorting
  73. RowFeatureVector = eig_vectors';
  74. if l1 < l2
  75.     % Need to change
  76.     temp = RowFeatureVector(1, :);
  77.     RowFeatureVector(1, :) = RowFeatureVector(2, :);
  78.     RowFeatureVector(2, :) = temp;
  79. end
  80. RowFeatureVector;
  81. RowZeroMeanData = [x1 x2]';
  82. disp("        4. Calculate FinalData (they are separated by 'x' and 'y' axis)");
  83. FinalData = (RowFeatureVector * RowZeroMeanData)'
  84. display(' ');
  85. display(' ');
  86. % These data ("FinalData") are now separated by the 2 original axis "x" and
  87. % "y" and not the eigenvectors, so this was the purpose of the
  88. % transformation. WE CAN SAY THAT THE EIGENVECTORS ARE NOW THE AXIS!!!!
  89.  
  90. figure();
  91. plot(FinalData(:, 1), FinalData(:, 2), 'x');
  92. title("Data transformed with 2 eigenvectors (now eig = axis 'x', 'y')");
  93. hold on
  94. xx = linspace(-2, 2.5, 101);
  95. yy = 0 * xx;
  96. plot(xx, yy, 'blue');
  97. hold on
  98. yyy = linspace(-2, 2.5, 101);
  99. xxx = 0 * yyy;
  100. plot(xxx, yyy, 'green');
  101.  
  102.  
  103.  
  104.  
  105.  
  106.  
  107. % Retrieve the information
  108. % RowFeatureVector                                          2 x 2
  109. % RowZeroMeanData = [x1 x2]';                               2 x 10
  110. % FinalData = (RowFeatureVector * RowZeroMeanData)'         10 x 2
  111.  
  112. % Case 1 - Use 2 EIGENVECTORS
  113. display('************************************************');
  114. display('************************************************');
  115. display('        Retrieve information');
  116. display(' ');
  117. InitialData = [x11 x22]
  118. RowOriginalData = RowFeatureVector' * FinalData';       % (2x2) * (2x10) = 2 x 10
  119. RowOriginalData = RowOriginalData';                     % 10 x 2
  120. RowOriginalData(:, 1) = RowOriginalData(:, 1) + avg1;
  121. RowOriginalData(:, 2) = RowOriginalData(:, 2) + avg2;
  122. display('        When I use 2 eigenvectors (correct): ');
  123. RowOriginalData
  124. display(' ' );
  125.  
  126. % Case 2 - Use 1 EIGENVECTOR, THIS WITH LAMBDA = 1.28
  127. % COMPRESS INFO - I WILL KEEP ONLY THE 1ST ROW = 1ST EIGENVECTOR
  128. RowCompressedVector = RowFeatureVector(1, :);                   % 1 x 2
  129. FinalCompressedData = FinalData(:, 1);                          % 10 x 1
  130. RowCompressedData = RowCompressedVector' * FinalCompressedData';% (2x1) * (1x10) = 2 x 10
  131. RowCompressedData = RowCompressedData';                         % 10 x 2
  132. RowCompressedData(:, 1) = RowCompressedData(:, 1) + avg1;
  133. RowCompressedData(:, 2) = RowCompressedData(:, 2) + avg2;
  134. display('        When I use o 1 eigenvector (wrong): ');
  135. RowCompressedData
  136.  
  137.  
  138. % Calculate information loss
  139. losses = zeros(n, 2);
  140. for i = 1 : n
  141.     for j = 1 : 2
  142.         losses(i, j) = abs(RowOriginalData(i, j) - RowCompressedData(i, j)) / RowOriginalData(i, j);
  143.     end
  144. end
  145.  
  146. losses;
  147. mean_losses = mean(losses);
  148. loss1_perc = 100 * mean_losses(1);
  149. loss2_perc = 100 * mean_losses(2);
  150. display('        Information loss when using 1 eigenvector');
  151. display(' ');
  152. disp("Loss_1 = " + num2str(loss1_perc) + "%    Loss_2 = " + num2str(loss2_perc) + "%");
  153. total_loss = mean(mean_losses);
  154. disp("Total information loss = " + num2str(100 * total_loss) + "%");
  155.  
  156. figure();
  157. plot(RowOriginalData(:, 1), RowOriginalData(:, 2), 'bx');
  158. hold on
  159. plot(RowCompressedData(:, 1), RowCompressedData(:, 2), 'rx');
  160. title("Information Loss when using 1 eigenvectors");
  161. xlabel("x");
  162. ylabel("y");
  163. legend("Using u_1, u_2", "Using only u_2");
  164.  
  165.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement