Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function [attribute,threshold] = ChooseAttribute(features,targets)
- entrp_before=my_entropy(targets);%entropy before doing anything
- all_threasholds = []; % Holds the threashold values generated for each attribute.
- disp("EntrpArr "+entrp_before);
- leng=size(features,2);
- num_of_candidate_threasholds=20;
- info_gain(1:leng,1:num_of_candidate_threasholds)=-1;
- %Go through every feature/attribute
- for i=1:leng
- threasholds = linspace(min(features(:,i))+30,max(features(:,i))-30,num_of_candidate_threasholds);
- threasholds= normrnd(mean(features(:,i)),std(features(:,i))*3,[1,num_of_candidate_threasholds]);
- all_threasholds=[all_threasholds;threasholds];
- left(1:length(features)) = -1;
- right(1:length(features)) = -1;
- %Split the array based on the different threashold values selected
- %and put them all in the left and right arrays.
- for x=1:length(threasholds)
- % split array into two based on less than or more than threashold.
- this_thsh=threasholds(x);
- this_features= features(:,i);% Get the features for this attribute for this attribute
- for z=1:size(features,1)
- if(this_features(z)>this_thsh)
- right(z) = targets(z);
- else
- left(z) = targets(z);
- end
- end
- %remove initialization values
- right(right==-1)=[];
- left(left==-1)=[];
- %Calculate the information gain for this attribute and add it to array.
- lftPrpr = length(left)/(length(left)+length(right));
- rhtPrpr = length(right)/(length(left)+length(right));
- info_gain(i,x) = entrp_before-(lftPrpr*my_entropy(left)+rhtPrpr*my_entropy(right));
- %disp("Entropy for this split: Left: "+my_entropy(left)+" Right: "+my_entropy(right));
- end
- disp("Complted attribute: "+i);
- end
- %Work out best info gain in matrixx
- [max_gain_col,max_idx_col]= max(info_gain);%Max of each col
- [max_all,max_all_idx] = max(max_gain_col);%max of everything
- out=[max_idx_col(max_all_idx),all_threasholds(max_idx_col(max_all_idx),max_all_idx)];
- attribute = max_idx_col(max_all_idx);
- threshold = all_threasholds(max_idx_col(max_all_idx),max_all_idx);
- disp("Max gain found: "+max_all+" on attribute: "+out(1)+" with threashold: "+out(2));
- return;
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement