Advertisement
david_eisner

edX ML SVM with Cross-Validation

Nov 24th, 2014
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MatLab 4.11 KB | None | 0 0
  1. % edX ML HW 8 problem 2, version 2, with home-grown K-fold binning
  2.  
  3. % Return values:
  4. %     minCs:        A 100x1 vector of C value with lowest Ecv for
  5. %                   each repetition
  6. %
  7. %     C_Ecv_table:  N_rep x |Cvals| matrix of Ecv for each C value in
  8. %                   each repetition.
  9.  
  10. function [minCs, C_Ecv_table] = p7hw8v2()
  11.    
  12.     Q = 2;       % polynomial degree
  13.     K = 10;      % K-fold cross validation
  14.     N_rep = 100; % Repeat experiment N_rep times
  15.    
  16.     Dtrain = importdata('features.train');
  17.    
  18.     % Comparing digit 1 vs digit 5 classification
  19.    
  20.     % Pull out subset of training data where digit is '1' or '5'.
  21.     Dtrain_1vs5  = Dtrain((Dtrain(:,1) == 1) | (Dtrain(:,1) == 5),:);
  22.     y_train      = label_digits(1, Dtrain_1vs5(:,1));
  23.    
  24.     % The C values we'll compare, from smallest to largest
  25.     Cvals = [0.0001 0.001 0.01 0.1 1];
  26.    
  27.     % Maintain a N_rep x |Cvals| matrix of Ecv for each C value in each rep.
  28.    
  29.     C_Ecv_table = zeros(N_rep, numel(Cvals));
  30.     C_Ecv = zeros(1, numel(Cvals));     % one row in table, pre-allocated
  31.    
  32.     % pre-allocate return value
  33.     minCs = zeros(N_rep,1);
  34.    
  35.     for rep=1:N_rep
  36.         fprintf('================== RUN %d of %d ===================\n', rep, N_rep);
  37.         c_idx = 1;
  38.         for C = Cvals
  39.             fprintf('========\nC: %f\n', C);
  40.             svmopts = sprintf('-s 0 -t 1 -d %d -g 1 -r 1 -c %f -h 0', Q, C);
  41.             fprintf('*** svmopts: "%s"\n', svmopts);
  42.             Ntrain = size(y_train,1);
  43.            
  44.             % Generate set of indices for K-fold CV
  45.             I = randperm(Ntrain);
  46.            
  47.             Eval_sum = 0;   % Running sum of K values for computing Ecv
  48.             for k=1:K
  49.                 % The interval [a,b] runs through the K bins used
  50.                 % for the validation set.
  51.                 a = round(Ntrain*(k-1)/K)+1;
  52.                 b = round(Ntrain*k/K);
  53.                
  54.                 % Validation set, bin k of K:
  55.                 ycv_val = y_train(I(a:b));
  56.                 Dcv_val = Dtrain_1vs5(I(a:b),:);
  57.                
  58.                 % Training set, eveything not in bin k:
  59.                 ycv_train = y_train;     % Copy entire training set labels.
  60.                 ycv_train(I(a:b)) = [];  % Now delete the validation bin.
  61.                
  62.                 Dcv_train = Dtrain_1vs5;  % Repeat for features.
  63.                 Dcv_train(I(a:b),:) = [];
  64.  
  65.                 dbg_idxs = I(a:b);
  66.                 fprintf('\n**** CV %d of %d; bin window %d to %d, indexes [%d %d %d %d ... ****\n', ...
  67.                     k, K, a, b, dbg_idxs(1:4));
  68.                
  69.                 fprintf('**** Training on %d points ****\n', numel(ycv_train));
  70.                 model = svmtrain(ycv_train, Dcv_train(:,2:3), svmopts);
  71.                
  72.                 fprintf('**** Testing on %d points ****\n', numel(ycv_val));
  73.                 [pred_val]  = svmpredict( ycv_val, Dcv_val(:,2:3), model);
  74.              
  75.                 % Calculate the error ourselves, as a sanity check.
  76.                 N_mismatch_val = numel(find(pred_val ~= ycv_val));
  77.                 Eval = N_mismatch_val/size(ycv_val, 1);
  78.  
  79.                 Eval_sum = Eval_sum + Eval;
  80.                 fprintf('** Eval = %f **\n', Eval);
  81.             end
  82.            
  83.             Ecv = Eval_sum/K;
  84.             fprintf(' ** (%d) Ecv = %f for C = %f **\n', c_idx, Ecv, C);
  85.  
  86.             C_Ecv(c_idx) = Ecv;
  87.             c_idx = c_idx + 1;
  88.         end     % for C = Cvals
  89.         C_Ecv_table(rep,:) = C_Ecv;
  90.        
  91.         [~, I] = min(C_Ecv);
  92.         minCs(rep) = Cvals(I);
  93.         fprintf('*** minCs(%d) = %f ***\n', rep, minCs(rep));
  94.     end      % for rep=1:N_rep
  95.  
  96.     % Display output here, for convenience.
  97.     fprintf('\n');
  98.     tabulate(minCs)
  99.    
  100.     fprintf('\n');
  101.     disp(sum(C_Ecv_table)/N_rep)  % Average Ecv for each C-value column.
  102. end
  103.  
  104.  
  105. % Given an Mx1 matrix of digits, return an
  106. % Mx1 matrix y where y(i) = +1 or -1 as
  107. % digits(i) = digit_one
  108. function y = label_digits( digit_one, digits)
  109.  
  110.     y = repmat(-1, size(digits,1), 1);
  111.     y( logical(digits == digit_one)) = +1;
  112. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement