Advertisement
Guest User

PCA Lab

a guest
May 19th, 2018
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Octave 4.72 KB | None | 0 0
  1. clear;
  2. close all;
  3. load("iris.mat");
  4. setosa_label = 1;
  5. virginica_label = 2;
  6. versicolor_label = 3;
  7.  
  8. setosa = [ones(size(setosa,1),1)*setosa_label setosa];
  9. virginica = [ones(size(virginica,1),1)*virginica_label virginica];
  10. versicolor = [ones(size(versicolor,1),1)*versicolor_label versicolor];
  11. data = [setosa ; virginica ; versicolor];
  12.  
  13. function [X] = centerAndScale(data)
  14.   means = mean(data);    
  15.   stdv = std(data);
  16.   X = zeros(size(data));
  17.   for i=1:size(data,2)
  18.     for j=1:size(data,1)
  19.         X(j,i) = ( means(:,i) - data(j,i))/stdv(:,i);
  20.     end
  21.   end
  22. end
  23.  
  24. data(:,2:end) = centerAndScale(data(:,2:end));
  25.  
  26. mean_orig_1 = mean(data(1:50,2:end))
  27. mean_orig_2 = mean(data(51:100,2:end))
  28. mean_orig_3 = mean(data(101:150,2:end))
  29.  
  30. [U, S, V] = svd(data(:,2:end),0)
  31. Ur = U * S;  
  32. data(:,2:end)=Ur;
  33.  
  34. mean_1 = mean(data(1:50,2:end))
  35. mean_2 = mean(data(51:100,2:end))
  36. mean_3 = mean(data(101:150,2:end))
  37.  
  38. Ur_1_cov = cov(data(1:50,2:end))
  39. Ur_2_cov = cov(data(51:100,2:end))
  40. Ur_3_cov = cov(data(101:150,2:end))
  41.  
  42. function [weights, cumulative_weights] = scree(S)
  43.   S2 = S^2;
  44.   weights = zeros(size(S,2),1);
  45.   sumS2 = sum(sum(S2));
  46.   weightsum = 0;
  47.  
  48.   for i=1:size(S,2)
  49.     weights(i) = S2(i,i)/sumS2;
  50.     weightsum = weightsum + weights(i);
  51.     cumulative_weights(i) = weightsum;
  52.   end
  53. end
  54.  
  55. [weights, cumulative_weights] = scree(S)
  56. figure;
  57. plot(weights,'x:b', "markersize", 10, "linewidth", 5, "linestyle", '-');
  58. set(gca, "fontsize", 28)
  59. xlabel("Principal Component", "Fontsize",36, "Fontweight","Bold");
  60. ylabel("Proportion Of Variance", "Fontsize",36, "Fontweight","Bold");
  61. title('Scree Plot',"Fontsize",48, "Fontweight","Bold");
  62. grid;
  63. figure;
  64. plot(cumulative_weights,'x:b', "markersize", 10, "linewidth", 5, "linestyle", '-');
  65. set(gca, "fontsize", 28)
  66. grid;
  67. xlabel("Principal Component", "Fontsize",36, "Fontweight","Bold");
  68. ylabel("Cumulative Proportion Of Variance", "Fontsize",36, "Fontweight","Bold");
  69. title('Cumulative Scree Plot',"Fontsize",48, "Fontweight","Bold");
  70.  
  71. setosa = data(1:50,:);
  72. setosa_mean = mean(setosa(:,2:end));
  73. setosa_covariance = cov(setosa(:,2:end));
  74. virginica = data(51:100,:);
  75. virginica_mean = mean(virginica(:,2:end));
  76. virginica_covariance = cov(virginica(:,2:end));
  77. versicolor = data(101:150,:);
  78. versicolor_mean = (mean(versicolor(:,2:end)));
  79. versicolor_covariance = (cov(versicolor(:,2:end)));
  80.  
  81. function [Vsquare] = squareSign(V)
  82.   for i=1:size(V,2)
  83.     for j=1:size(V,2)
  84.         Vsquare(i,j) = V(i,j)^2;
  85.         if V(i,j)<0
  86.             Vsquare(i,j) = Vsquare(i,j)*-1;
  87.         else  
  88.             Vsquare(i,j) = Vsquare(i,j)*1;
  89.         end
  90.     end
  91.   end
  92. end
  93.  
  94. function [] = plotLoading(Vsquare, idx)
  95.   figure;
  96.   bar(Vsquare(:,idx),0.5);
  97.   grid;
  98.   set(gca, "fontsize", 28)
  99.   ymin = min(Vsquare(:,idx)) + (min(Vsquare(:,idx))/10);
  100.   ymax = max(Vsquare(:,idx)) + (max(Vsquare(:,idx))/10);
  101.   axis([0 size(Vsquare, 2)+1 ymin ymax]);
  102.  
  103.   xlabel('Feature index', "fontsize", 36);
  104.  
  105.   ylabel('Importance of feature', "fontsize", 36);
  106.  
  107.   [chart_title, ERRMSG] = sprintf('Loading Vector %d',idx);
  108.  
  109.   title(chart_title, "fontsize", 48);
  110. end
  111.  
  112. [Vsquare] = squareSign(V)
  113. plotLoading(Vsquare, 1)
  114. plotLoading(Vsquare, 2)
  115. plotLoading(Vsquare, 3)
  116. plotLoading(Vsquare, 4)
  117.  
  118. function [] = plotScatter3(setosa, versicolor, virginica, idx, idy, idz, labels)
  119.   figure;
  120.  
  121.   scatter3(setosa(:,idx), setosa(:,idy), setosa(:,idz), 5, 'r')
  122.   hold on;
  123.   scatter3(versicolor(:,idx), versicolor(:,idy), versicolor(:,idz), 5, 'g')
  124.   scatter3(virginica(:,idx), virginica(:,idy), virginica(:,idz), 5, 'b')
  125.   set(gca, "fontsize", 28)
  126.   xlabel(labels{idx - 1}, "fontsize", 36)
  127.   ylabel(labels{idy - 1}, "fontsize", 36)
  128.   zlabel(labels{idz - 1}, "fontsize", 36)
  129.   title([labels{idx - 1} " vs. " labels{idy - 1} " vs. " labels{idz - 1}], "fontsize", 48)
  130.  
  131.   hold off;
  132. end
  133.  
  134. # Plot original data
  135. original_labels = cellstr(['Sepal Width'; 'Sepal Length'; 'Petal Width'; 'Petal Length']);
  136.  
  137. plotScatter3(setosa, versicolor, virginica, 2, 3, 4, original_labels);
  138. plotScatter3(setosa, versicolor, virginica, 2, 3, 5, original_labels);
  139. plotScatter3(setosa, versicolor, virginica, 2, 4, 5, original_labels);
  140. plotScatter3(setosa, versicolor, virginica, 3, 4, 5, original_labels);
  141.  
  142. data_setosa = data(1:50, :);
  143. data_versicolor = data(51:100, :);
  144. data_virginica = data(101:150, :);
  145.  
  146. # Plot PCA data
  147. pca_labels = cellstr(['PC1'; 'PC2'; 'PC3'; 'PC4']);
  148.  
  149. plotScatter3(data_setosa, data_versicolor, data_virginica, 2, 3, 4, pca_labels);
  150. plotScatter3(data_setosa, data_versicolor, data_virginica, 2, 3, 5, pca_labels);
  151. plotScatter3(data_setosa, data_versicolor, data_virginica, 2, 4, 5, pca_labels);
  152. plotScatter3(data_setosa, data_versicolor, data_virginica, 3, 4, 5, pca_labels);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement