Advertisement
Guest User

Untitled

a guest
Apr 25th, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.88 KB | None | 0 0
  1. function [w,b,out] = ALM_SVM(X,y,lam,opts)
  2. %=============================================
  3. %
  4. % augmented Lagrangian method for solving SVM
  5. % min_{w,b,t} sum(t) + lam/2*norm(w)^2
  6. % s.t. y(i)*(w'*X(:,i)+b) >= 1-t(i)
  7. % t(i) >= 0, i = 1,...,N
  8. %
  9. %===============================================
  10. %
  11. % ==============================================
  12. % input:
  13. % X: training data, each column is a sample data
  14. % y: label vector
  15. % lam: model parameter
  16. % opts.tol: stopping tolerance
  17. % opts.maxit: maximum number of outer iteration
  18. % opts.subtol: stopping tolerance for inner-loop
  19. % opts.maxsubit: maxinum number of iteration for inner-loop
  20. % opts.w0: initial w
  21. % opts.b0: initial b0
  22. % opts.t0: initial t0
  23. % opts.beta: penalty parameter
  24. %
  25. % output:
  26. % w: learned w
  27. % b: learned b
  28. % out.hist_pres: historical primal residual
  29. % out.hist_dres: historical dual residual
  30. % out.hist_subit: historical iteration number of inner-loop
  31.  
  32. % ======================================================
  33.  
  34. %% get size of problem: p is dimension; N is number of data pts
  35. [p,N] = size(X);
  36.  
  37. %% set parameters
  38. if isfield(opts,'tol') tol = opts.tol; else tol = 1e-4; end
  39. if isfield(opts,'maxit') maxit = opts.maxit; else maxit = 500; end
  40. if isfield(opts,'subtol') subtol = opts.subtol; else subtol = 1e-4; end
  41. if isfield(opts,'maxsubit') maxsubit = opts.maxsubit; else maxsubit = 5000; end
  42. if isfield(opts,'w0') w0 = opts.w0; else w0 = randn(p,1); end
  43. if isfield(opts,'b0') b0 = opts.b0; else b0 = 0; end
  44. if isfield(opts,'t0') t0 = opts.t0; else t0 = zeros(N,1); end
  45. if isfield(opts,'beta') beta = opts.beta; else beta = 1; end
  46.  
  47.  
  48. alpha0 = 0.5;
  49. alpha = 0.5;
  50. inc_ratio = 2;
  51. dec_ratio = 0.6;
  52.  
  53. w = w0; b = b0; t = max(0,t0);
  54. % initialize dual variable
  55. u = zeros(N,1);
  56. onevec = ones(N,1);
  57. Lip = lam + beta*max(1,norm(X))*norm([eye(p), X; eye(p),X]);
  58. alpha_min = 1.9/Lip;
  59.  
  60. %% compute the primal residual and save to pres
  61.  
  62. p = max(0,onevec-t-y.*(X'*w+b));
  63. pres = norm(p);
  64. % save historical primal residual
  65. hist_pres = pres;
  66.  
  67. % compute gradient of ordinary Lagrangian function about (w,b,t)
  68. grad_w = lam*w -X*(u.*y);
  69. grad_b = -u'*y ;
  70. grad_t = onevec-u;
  71.  
  72. id1 = t > 0;
  73. id2 = t == 0;
  74.  
  75. % save dual residual to dres
  76. dres = norm(grad_w) + norm(grad_b) ...
  77. + norm(grad_t(id1)) + norm(min(0,grad_t(id2)));
  78. hist_dres = dres;
  79.  
  80. hist_subit = 0;
  81.  
  82. iter = 0; subit = 0;
  83. %% start of outer loop
  84. while max(pres,dres) > tol && iter < maxit
  85. iter = iter + 1;
  86. % call the subroutine to update primal variable (w,b,t)
  87. w0 = w;
  88. b0 = b;
  89. t0 = t;
  90.  
  91. % fill in the subsolver by yourself
  92. % if slack variables are introduced, you will have more variables
  93. [w,b,t] = subsolver(w0,b0,t0,subtol,maxsubit);
  94.  
  95. hist_subit = [hist_subit; subit];
  96.  
  97. % update multiplier u
  98. primal = max(0,onevec-t-y.*(X'*w+b));
  99. u = max(0,u + beta*primal);
  100.  
  101. % compute primal residual and save to hist_pres
  102. pres = norm(primal);
  103. hist_pres = [hist_pres; pres];
  104.  
  105. % compute gradient of ordinary Lagrangian function about (w,b,t)
  106. grad_w = lam*w - X*(u.*y);
  107. grad_b = -u'*y ;
  108. grad_t = onevec-u;
  109.  
  110. % compute the dual residual and save to hist_dres
  111. id1 = t > 0;
  112. id2 = t == 0;
  113. dres = norm(grad_w) + norm(grad_b) ...
  114. + norm(grad_t(id1)) + norm(min(0,grad_t(id2)));
  115. hist_dres = [hist_dres; dres];
  116.  
  117. fprintf('out iter = %d, pres = %5.4e, dres = %5.4e, subit = %d\n',iter,pres,dres,subit);
  118. end
  119.  
  120. out.hist_pres = hist_pres;
  121. out.hist_dres = hist_dres;
  122. out.hist_subit = hist_subit;
  123.  
  124. %% =====================================================
  125. % subsolver for primal subproblem
  126. function [w,b,t] = subsolver(w0,b0,t0,subtol,maxsubit)
  127. % projected gradient for primal subproblem
  128. w = w0;
  129. b = b0;
  130. t = t0;
  131.  
  132.  
  133. % compute gradient of the augmented Lagrangian function at (w,b,t)
  134. primal = max(0,onevec-t-y.*(X'*w+b));
  135. grad_w = lam*w - X*(u.*y) - beta*X*(y.*primal);
  136. grad_b =-u'*y - beta*y'*primal;
  137. grad_t = onevec-u - beta*primal;
  138.  
  139. % compute gradient error
  140. id1 = t > 0;
  141. id2 = t == 0;
  142. grad_err = norm(grad_w) + norm(grad_b) ...
  143. + norm(grad_t(id1)) + norm( min(0,grad_t(id2)) );
  144.  
  145. subit = 0;
  146. % start of inner-loop
  147. while grad_err > subtol && subit < maxsubit
  148.  
  149. % compute gradient of augmented Lagrangian function at
  150. % (w0,b0,t0)
  151. primal_0 = max(0,onevec-t0-y.*(X'*w0+b0));
  152. grad0_w= lam*w0 - X*(u.*y)- beta*X*(y.*primal_0);
  153. grad0_b = -u'*y - beta*y'*primal_0;
  154. grad0_t = onevec-u - beta*primal_0;
  155.  
  156. % evaluate the value of augmented Lagrangian function at (w0,b0,t0)
  157. alpha = alpha0*inc_ratio;
  158. subit = subit + 1;
  159.  
  160. augObj0 = sum(t0) + lam/2*norm(w0)^2 + u'*(onevec-t0-y.*(X'*w0+b0)) ...
  161. + beta/2*norm(primal_0)^2;
  162. augObj = Inf;
  163.  
  164. % perform line search by checking local Lip continuity
  165. while augObj > augObj0 + (w-w0)'*grad0_w + 0.5/alpha*norm(w-w0)^2 ...
  166. + (b-b0)'*grad0_b + 0.5/alpha*norm(b-b0)^2 ...
  167. + (t-t0)'*grad0_t + 0.5/alpha*norm(t-t0)^2
  168.  
  169. %Keep alpha above alpha min
  170. alpha = dec_ratio*alpha;
  171. alpha = max(alpha_min, alpha);
  172.  
  173. % update (w,b,t) from (w0,b0,t0) by using step size alpha
  174. w = w0 - alpha*grad0_w;
  175. b = b0 - alpha*grad0_b;
  176. t = t0 - alpha*grad0_t;
  177. t = max(0,t);
  178. % evaluate the value of augmented Lagrangian function at (w,b,t)
  179.  
  180. augObj = sum(t) + lam/2*norm(w)^2 + u'*(onevec-t-y.*(X'*w+b)) ...
  181. + beta/2*norm( max(0,onevec-t-y.*(X'*w+b) ))^2;
  182.  
  183. end
  184. alpha0 = alpha;
  185. w0 = w; b0 = b; t0 = t;
  186.  
  187. % compute gradient of the augmented Lagrangian function at (w,b,t)
  188. primal = max(0,onevec-t-y.*(X'*w+b));
  189. grad_w = lam*w - X*(u.*y)- beta*X*(y.*primal);
  190. grad_b = -u'*y -beta*y'*primal;
  191. grad_t = onevec-u - beta*primal;
  192.  
  193. % compute grad_err
  194. id1 = t > 0;
  195. id2 = t == 0;
  196. grad_err = norm(grad_w) + norm(grad_b) ...
  197. + norm(grad_t(id1)) + norm( min(0,grad_t(id2)) );
  198.  
  199. end
  200. end
  201. %=====================================================
  202.  
  203. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement