Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function [w,b,out] = ALM_SVM(X,y,lam,opts)
- %=============================================
- %
- % augmented Lagrangian method for solving SVM
- % min_{w,b,t} sum(t) + lam/2*norm(w)^2
- % s.t. y(i)*(w'*X(:,i)+b) >= 1-t(i)
- % t(i) >= 0, i = 1,...,N
- %
- %===============================================
- %
- % ==============================================
- % input:
- % X: training data, each column is a sample data
- % y: label vector
- % lam: model parameter
- % opts.tol: stopping tolerance
- % opts.maxit: maximum number of outer iteration
- % opts.subtol: stopping tolerance for inner-loop
- % opts.maxsubit: maxinum number of iteration for inner-loop
- % opts.w0: initial w
- % opts.b0: initial b0
- % opts.t0: initial t0
- % opts.beta: penalty parameter
- %
- % output:
- % w: learned w
- % b: learned b
- % out.hist_pres: historical primal residual
- % out.hist_dres: historical dual residual
- % out.hist_subit: historical iteration number of inner-loop
- % ======================================================
- %% get size of problem: p is dimension; N is number of data pts
- [p,N] = size(X);
- %% set parameters
- if isfield(opts,'tol') tol = opts.tol; else tol = 1e-4; end
- if isfield(opts,'maxit') maxit = opts.maxit; else maxit = 500; end
- if isfield(opts,'subtol') subtol = opts.subtol; else subtol = 1e-4; end
- if isfield(opts,'maxsubit') maxsubit = opts.maxsubit; else maxsubit = 5000; end
- if isfield(opts,'w0') w0 = opts.w0; else w0 = randn(p,1); end
- if isfield(opts,'b0') b0 = opts.b0; else b0 = 0; end
- if isfield(opts,'t0') t0 = opts.t0; else t0 = zeros(N,1); end
- if isfield(opts,'beta') beta = opts.beta; else beta = 1; end
- alpha0 = 0.5;
- alpha = 0.5;
- inc_ratio = 2;
- dec_ratio = 0.6;
- w = w0; b = b0; t = max(0,t0);
- % initialize dual variable
- u = zeros(N,1);
- onevec = ones(N,1);
- Lip = lam + beta*max(1,norm(X))*norm([eye(p), X; eye(p),X]);
- alpha_min = 1.9/Lip;
- %% compute the primal residual and save to pres
- p = max(0,onevec-t-y.*(X'*w+b));
- pres = norm(p);
- % save historical primal residual
- hist_pres = pres;
- % compute gradient of ordinary Lagrangian function about (w,b,t)
- grad_w = lam*w -X*(u.*y);
- grad_b = -u'*y ;
- grad_t = onevec-u;
- id1 = t > 0;
- id2 = t == 0;
- % save dual residual to dres
- dres = norm(grad_w) + norm(grad_b) ...
- + norm(grad_t(id1)) + norm(min(0,grad_t(id2)));
- hist_dres = dres;
- hist_subit = 0;
- iter = 0; subit = 0;
- %% start of outer loop
- while max(pres,dres) > tol && iter < maxit
- iter = iter + 1;
- % call the subroutine to update primal variable (w,b,t)
- w0 = w;
- b0 = b;
- t0 = t;
- % fill in the subsolver by yourself
- % if slack variables are introduced, you will have more variables
- [w,b,t] = subsolver(w0,b0,t0,subtol,maxsubit);
- hist_subit = [hist_subit; subit];
- % update multiplier u
- primal = max(0,onevec-t-y.*(X'*w+b));
- u = max(0,u + beta*primal);
- % compute primal residual and save to hist_pres
- pres = norm(primal);
- hist_pres = [hist_pres; pres];
- % compute gradient of ordinary Lagrangian function about (w,b,t)
- grad_w = lam*w - X*(u.*y);
- grad_b = -u'*y ;
- grad_t = onevec-u;
- % compute the dual residual and save to hist_dres
- id1 = t > 0;
- id2 = t == 0;
- dres = norm(grad_w) + norm(grad_b) ...
- + norm(grad_t(id1)) + norm(min(0,grad_t(id2)));
- hist_dres = [hist_dres; dres];
- fprintf('out iter = %d, pres = %5.4e, dres = %5.4e, subit = %d\n',iter,pres,dres,subit);
- end
- out.hist_pres = hist_pres;
- out.hist_dres = hist_dres;
- out.hist_subit = hist_subit;
- %% =====================================================
- % subsolver for primal subproblem
- function [w,b,t] = subsolver(w0,b0,t0,subtol,maxsubit)
- % projected gradient for primal subproblem
- w = w0;
- b = b0;
- t = t0;
- % compute gradient of the augmented Lagrangian function at (w,b,t)
- primal = max(0,onevec-t-y.*(X'*w+b));
- grad_w = lam*w - X*(u.*y) - beta*X*(y.*primal);
- grad_b =-u'*y - beta*y'*primal;
- grad_t = onevec-u - beta*primal;
- % compute gradient error
- id1 = t > 0;
- id2 = t == 0;
- grad_err = norm(grad_w) + norm(grad_b) ...
- + norm(grad_t(id1)) + norm( min(0,grad_t(id2)) );
- subit = 0;
- % start of inner-loop
- while grad_err > subtol && subit < maxsubit
- % compute gradient of augmented Lagrangian function at
- % (w0,b0,t0)
- primal_0 = max(0,onevec-t0-y.*(X'*w0+b0));
- grad0_w= lam*w0 - X*(u.*y)- beta*X*(y.*primal_0);
- grad0_b = -u'*y - beta*y'*primal_0;
- grad0_t = onevec-u - beta*primal_0;
- % evaluate the value of augmented Lagrangian function at (w0,b0,t0)
- alpha = alpha0*inc_ratio;
- subit = subit + 1;
- augObj0 = sum(t0) + lam/2*norm(w0)^2 + u'*(onevec-t0-y.*(X'*w0+b0)) ...
- + beta/2*norm(primal_0)^2;
- augObj = Inf;
- % perform line search by checking local Lip continuity
- while augObj > augObj0 + (w-w0)'*grad0_w + 0.5/alpha*norm(w-w0)^2 ...
- + (b-b0)'*grad0_b + 0.5/alpha*norm(b-b0)^2 ...
- + (t-t0)'*grad0_t + 0.5/alpha*norm(t-t0)^2
- %Keep alpha above alpha min
- alpha = dec_ratio*alpha;
- alpha = max(alpha_min, alpha);
- % update (w,b,t) from (w0,b0,t0) by using step size alpha
- w = w0 - alpha*grad0_w;
- b = b0 - alpha*grad0_b;
- t = t0 - alpha*grad0_t;
- t = max(0,t);
- % evaluate the value of augmented Lagrangian function at (w,b,t)
- augObj = sum(t) + lam/2*norm(w)^2 + u'*(onevec-t-y.*(X'*w+b)) ...
- + beta/2*norm( max(0,onevec-t-y.*(X'*w+b) ))^2;
- end
- alpha0 = alpha;
- w0 = w; b0 = b; t0 = t;
- % compute gradient of the augmented Lagrangian function at (w,b,t)
- primal = max(0,onevec-t-y.*(X'*w+b));
- grad_w = lam*w - X*(u.*y)- beta*X*(y.*primal);
- grad_b = -u'*y -beta*y'*primal;
- grad_t = onevec-u - beta*primal;
- % compute grad_err
- id1 = t > 0;
- id2 = t == 0;
- grad_err = norm(grad_w) + norm(grad_b) ...
- + norm(grad_t(id1)) + norm( min(0,grad_t(id2)) );
- end
- end
- %=====================================================
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement