Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- clc ; clear all;
- pkg load statistics
- pkg load nan
- % 1. LOAD IRIS DATASET
- load('wine');
- % 2.1 REMOVING OUTLIERS
- mean_values = mean(wineInputs);
- sigma = std(wineInputs);
- n = size(wineInputs, 1);
- outlier = abs(wineInputs - mean_values(ones(n,1), :)) > 3*sigma(ones(n,1),:);
- n_outliers = sum(outlier);
- % data contains only one outlier
- % we need to remove the outlier from features (meas) and
- % also from labels (species)
- wineInputs(any(outlier'),:) = [];
- wineTargets(any(outlier'),:) = [];
- % 2.2 TRANSFORM DATA TO [0-1]
- n_features = size(wineInputs,2);
- difference = max(wineInputs) - min(wineInputs);
- normalized_features = wineInputs;
- for i=1:n_features
- normalized_features(:,i) = (wineInputs(:,i) - min(wineInputs(:,i))) ./ difference(i);
- end
- wineInputs = normalized_features;
- n_wines = size(wineInputs,1);
- labels = ones(n_wines,1);
- for i=1:n_wines
- if (wineTargets(i,1) == 1)
- labels(i) = 1;
- else
- if (wineTargets(i,2) == 1)
- labels(i) = 2;
- else (wineTargets(i,3) == 1)
- labels(i) = 3;
- end
- end
- end
- data = [wineInputs , labels]
- % divide the matrix by their labels
- wine1 = data((data(:,14) == 1),:);
- wine2 = data((data(:,14) == 2),:);
- wine3 = data((data(:,14) == 3),:);
- for i=1:n_features
- subplot(4,4,i);
- boxplot({wineInputs(1:58,i),wineInputs(59:121,i),wineInputs(122:168,i)});
- xlim ("manual")
- xlim([0.0 3.0]);
- ylim ("manual")
- ylim([0.0 1.0]);
- title(i)
- end
- % prepare sets of 80% and 20% of each category
- wine1_20 = wine1(1:size(wine1,1)*20/100,:);
- wine2_20 = wine2(1:size(wine2,1)*20/100,:);
- wine3_20 = wine3(1:size(wine3,1)*20/100,:);
- wine1_80 = wine1(1+size(wine1,1)*int32(20/100):size(wine1,1),:);
- wine2_80 = wine2(1+size(wine2,1)*int32(20/100):size(wine2,1),:);
- wine3_80 = wine3(1+size(wine3,1)*int32(20/100):size(wine3,1),:);
- validation_set = cell2mat([wine1_20(:,1:13);wine2_20(:,1:13);wine3_20(:,1:13)]);
- validation_class = [wine1_20(:,14);wine2_20(:,14);wine3_20(:,14)];
- training_set = cell2mat([wine1_80(:,1:13);wine2_80(:,1:13);wine3_80(:,1:13)]);
- training_class = [wine1_80(:,14);wine2_80(:,14);wine3_80(:,14)];
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement