Advertisement
Guest User

kmeans

a guest
Nov 19th, 2018
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MatLab 2.13 KB | None | 0 0
  1. clc ; clear all;
  2. pkg load statistics
  3. pkg load nan
  4.  
  5. % 1.  LOAD IRIS DATASET
  6. load('wine');
  7.  
  8. % 2.1 REMOVING OUTLIERS
  9. mean_values = mean(wineInputs);
  10. sigma = std(wineInputs);
  11. n = size(wineInputs, 1);
  12. outlier = abs(wineInputs - mean_values(ones(n,1), :)) > 3*sigma(ones(n,1),:);
  13. n_outliers = sum(outlier);
  14. % data contains only one outlier
  15. % we need to remove the outlier from features (meas) and
  16. % also from labels (species)
  17. wineInputs(any(outlier'),:) = [];
  18. wineTargets(any(outlier'),:) = [];
  19.  
  20. % 2.2 TRANSFORM DATA TO [0-1]
  21. n_features = size(wineInputs,2);
  22. difference = max(wineInputs) - min(wineInputs);
  23. normalized_features = wineInputs;
  24. for i=1:n_features
  25.   normalized_features(:,i) = (wineInputs(:,i) - min(wineInputs(:,i))) ./ difference(i);
  26. end
  27.  
  28. wineInputs = normalized_features;
  29. n_wines = size(wineInputs,1);
  30. labels = ones(n_wines,1);
  31. for i=1:n_wines
  32.   if (wineTargets(i,1) == 1)
  33.     labels(i) = 1;
  34.   else
  35.     if (wineTargets(i,2) == 1)
  36.       labels(i) = 2;
  37.     else (wineTargets(i,3) == 1)
  38.       labels(i) = 3;
  39.     end
  40.   end
  41. end
  42.  
  43. data = [wineInputs , labels]
  44.  
  45. % divide the matrix by their labels
  46. wine1 = data((data(:,14) == 1),:);
  47. wine2 = data((data(:,14) == 2),:);
  48. wine3 = data((data(:,14) == 3),:);
  49.  
  50.  
  51.  
  52. for i=1:n_features
  53.   subplot(4,4,i);
  54.  
  55.   boxplot({wineInputs(1:58,i),wineInputs(59:121,i),wineInputs(122:168,i)});
  56.   xlim ("manual")
  57.   xlim([0.0 3.0]);
  58.   ylim ("manual")
  59.   ylim([0.0 1.0]);
  60.   title(i)
  61. end
  62.  
  63.  
  64. % prepare sets of 80% and 20% of each category
  65. wine1_20     = wine1(1:size(wine1,1)*20/100,:);
  66. wine2_20 = wine2(1:size(wine2,1)*20/100,:);
  67. wine3_20  = wine3(1:size(wine3,1)*20/100,:);
  68. wine1_80     = wine1(1+size(wine1,1)*int32(20/100):size(wine1,1),:);
  69. wine2_80 = wine2(1+size(wine2,1)*int32(20/100):size(wine2,1),:);
  70. wine3_80  = wine3(1+size(wine3,1)*int32(20/100):size(wine3,1),:);
  71.  
  72. validation_set   = cell2mat([wine1_20(:,1:13);wine2_20(:,1:13);wine3_20(:,1:13)]);
  73. validation_class = [wine1_20(:,14);wine2_20(:,14);wine3_20(:,14)];
  74. training_set     = cell2mat([wine1_80(:,1:13);wine2_80(:,1:13);wine3_80(:,1:13)]);
  75. training_class   = [wine1_80(:,14);wine2_80(:,14);wine3_80(:,14)];
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement