Mercurial > hg > octave-nkf
diff scripts/optimization/sqp.m @ 10678:35338deff753
Guarantee equivalent results if sqp called with or wihout bounds
(bug #29989). Simplify input option handling and add %tests
to check validation code. Rewrite documentation string.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Wed, 02 Jun 2010 11:56:26 -0700 |
parents | 95c3e38098bf |
children | 693e22af08ae |
line wrap: on
line diff
--- a/scripts/optimization/sqp.m +++ b/scripts/optimization/sqp.m @@ -17,7 +17,12 @@ ## <http://www.gnu.org/licenses/>. ## -*- texinfo -*- -## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}, @var{maxiter}, @var{tolerance}) +## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}) +## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}) +## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}) +## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}) +## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}, @var{maxiter}) +## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}, @var{maxiter}, @var{tolerance}) ## Solve the nonlinear program ## @tex ## $$ @@ -60,7 +65,7 @@ ## function. The objective function must be of the form ## ## @example -## y = phi (x) +## @var{y} = phi (@var{x}) ## @end example ## ## @noindent @@ -70,24 +75,24 @@ ## function handles. The first element should point to the objective ## function, the second should point to a function that computes the ## gradient of the objective function, and the third should point to a -## function to compute the hessian of the objective function. If the +## function that computes the Hessian of the objective function. If the ## gradient function is not supplied, the gradient is computed by finite -## differences. If the hessian function is not supplied, a BFGS update -## formula is used to approximate the hessian. +## differences. If the Hessian function is not supplied, a BFGS update +## formula is used to approximate the Hessian. ## -## If supplied, the gradient function must be of the form +## When supplied, the gradient function must be of the form ## ## @example -## g = gradient (x) +## @var{g} = gradient (@var{x}) ## @end example ## ## @noindent ## in which @var{x} is a vector and @var{g} is a vector. ## -## If supplied, the hessian function must be of the form +## When supplied, the Hessian function must be of the form ## ## @example -## h = hessian (x) +## @var{h} = hessian (@var{x}) ## @end example ## ## @noindent @@ -97,14 +102,14 @@ ## functions that compute the equality constraints and the inequality ## constraints, respectively. ## -## If your problem does not have equality (or inequality) constraints, -## you may pass an empty matrix for @var{cef} (or @var{cif}). +## If the problem does not have equality (or inequality) constraints, +## then use an empty matrix ([]) for @var{cef} (or @var{cif}). ## -## If supplied, the equality and inequality constraint functions must be +## When supplied, the equality and inequality constraint functions must be ## of the form ## ## @example -## r = f (x) +## @var{r} = f (@var{x}) ## @end example ## ## @noindent @@ -132,18 +137,39 @@ ## @end example ## @end ifnottex ## -## The fifth and sixth arguments are vectors containing lower and upper bounds -## on @var{x}. These must be consistent with equality and inequality -## constraints @var{g} and @var{h}. If the bounds are not specified, or are -## empty, they are set to -@var{realmax} and @var{realmax} by default. +## The fifth and sixth arguments contain lower and upper bounds +## on @var{x}. These must be consistent with the equality and inequality +## constraints @var{g} and @var{h}. If the arguments are vectors then +## @var{x}(i) is bound by @var{lb}(i) and @var{ub}(i). A bound can also +## be a scalar in which case all elements of @var{x} will share the same +## bound. If only one bound (lb, ub) is specified then the other will +## default to (-@var{realmax}, +@var{realmax}). +## +## The seventh argument specifies the maximum number of iterations. +## The default value is 100. +## +## The eighth argument specifies the tolerance for the stopping criteria. +## The default value is @code{eps}. ## -## The seventh argument is max. number of iterations. If not specified, -## the default value is 100. +## The value returned in @var{info} may be one of the following: +## @table @asis +## @item 101 +## The algorithm terminated normally. +## Either all constraints meet the requested tolerance, or the stepsize, +## @tex +## $\Delta x,$ +## @end tex +## @ifnottex +## delta @var{x}, +## @end ifnottex +## is less than @code{tol * norm (x)}. +## @item 102 +## The BFGS update failed. +## @item 103 +## The maximum number of iterations was reached. +## @end table ## -## The eighth argument is tolerance for stopping criteria. If not specified, -## the default value is @var{eps}. -## -## Here is an example of calling @code{sqp}: +## An example of calling @code{sqp}: ## ## @example ## function r = g (x) @@ -179,19 +205,6 @@ ## -0.0052227 ## @end example ## -## The value returned in @var{info} may be one of the following: -## @table @asis -## @item 101 -## The algorithm terminated because the norm of the last step was less -## than @code{tol * norm (x))} (the value of tol is currently fixed at -## @code{sqrt (eps)}---edit @file{sqp.m} to modify this value. -## @item 102 -## The BFGS update failed. -## @item 103 -## The maximum number of iterations was reached (the maximum number of -## allowed iterations is currently fixed at 100---edit @file{sqp.m} to -## increase this value). -## @end table ## @seealso{qp} ## @end deftypefn @@ -204,326 +217,333 @@ global __sqp_cif__; global __sqp_cifcn__; - if (nargin >= 2 && nargin <= 8 && nargin != 5) + if (nargin < 2 || nargin > 8 || nargin == 5) + print_usage (); + endif - ## Choose an initial NxN symmetric positive definite Hessan - ## approximation B. - - n = length (x); + if (!isvector (x)) + error ("sqp: X must be a vector"); + endif + if (rows (x) == 1) + x = x'; + endif - ## Evaluate objective function, constraints, and gradients at initial - ## value of x. - ## - ## obj_fun - ## obj_grad - ## ce_fun -- equality constraint functions - ## ci_fun -- inequality constraint functions - ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T + obj_grd = @fd_obj_grd; + have_hess = 0; + if (iscell (objf)) + switch length (objf) + case {1} + obj_fun = objf{1}; + case {2} + obj_fun = objf{1}; + obj_grd = objf{2}; + case {3} + obj_fun = objf{1}; + obj_grd = objf{2}; + obj_hess = objf{3}; + have_hess = 1; + otherwise + error ("sqp: invalid objective function specification"); + endswitch + else + obj_fun = objf; # No cell array, only obj_fun set + endif + __sqp_obj_fun__ = obj_fun; - obj_grd = @fd_obj_grd; - have_hess = 0; - if (iscell (objf)) - if (length (objf) > 0) - __sqp_obj_fun__ = obj_fun = objf{1}; - if (length (objf) > 1) - obj_grd = objf{2}; - if (length (objf) > 2) - obj_hess = objf{3}; - have_hess = 1; - endif - endif - else - error ("sqp: invalid objective function"); + ce_fun = @empty_cf; + ce_grd = @empty_jac; + if (nargin > 2) + ce_grd = @fd_ce_jac; + if (iscell (cef)) + switch length (cef) + case {1} + ce_fun = cef{1}; + case {2} + ce_fun = cef{1}; + ce_grd = cef{2}; + otherwise + error ("sqp: invalid equality constraint function specification"); + endswitch + elseif (! isempty (cef)) + ce_fun = cef; # No cell array, only constraint equality function set + endif + endif + __sqp_ce_fun__ = ce_fun; + + ci_fun = @empty_cf; + ci_grd = @empty_jac; + if (nargin > 3) + ## constraint function given by user with possible gradient + __sqp_cif__ = cif; + ## constraint function given by user without gradient + __sqp_cifcn__ = @empty_cf; + if (iscell (cif)) + if (length (cif) > 0) + __sqp_cifcn__ = cif{1}; endif - else - __sqp_obj_fun__ = obj_fun = objf; + elseif (! isempty (cif)) + __sqp_cifcn__ = cif; endif - ce_fun = @empty_cf; - ce_grd = @empty_jac; - if (nargin > 2) - ce_grd = @fd_ce_jac; - if (iscell (cef)) - if (length (cef) > 0) - __sqp_ce_fun__ = ce_fun = cef{1}; - if (length (cef) > 1) - ce_grd = cef{2}; - endif - else - error ("sqp: invalid equality constraint function"); - endif - elseif (! isempty (cef)) - ce_fun = cef; + if (nargin < 5 || (nargin > 5 && isempty (lb) && isempty (ub))) + ## constraint inequality function only without any bounds + ci_grd = @fd_ci_jac; + if (iscell (cif)) + switch length (cif) + case {1} + ci_fun = cif{1}; + case {2} + ci_fun = cif{1}; + ci_grd = cif{2}; + otherwise + error ("sqp: invalid inequality constraint function specification"); + endswitch + elseif (! isempty (cif)) + ci_fun = cif; # No cell array, only constraint inequality function set endif - endif - __sqp_ce_fun__ = ce_fun; - - ci_fun = @empty_cf; - ci_grd = @empty_jac; - - if (nargin > 3) - ## constraint function given by user with possibly gradient - __sqp_cif__ = cif; - ## constraint function given by user without gradient - __sqp_cifcn__ = @empty_cf; - if (iscell (__sqp_cif__)) - if (length (__sqp_cif__) > 0) - __sqp_cifcn__ = __sqp_cif__{1}; + else + ## constraint inequality function with bounds present + global __sqp_lb__; + if (isvector (lb)) + __sqp_lb__ = lb(:); + elseif (isempty (lb)) + if (isa (x, "single")) + __sqp_lb__ = -realmax ("single"); + else + __sqp_lb__ = -realmax; endif - elseif (! isempty (__sqp_cif__)) - __sqp_cifcn__ = __sqp_cif__; + else + error ("sqp: invalid lower bound"); endif - if (nargin < 5) - ci_grd = @fd_ci_jac; - if (iscell (cif)) - if (length (cif) > 0) - __sqp_ci_fun__ = ci_fun = cif{1}; - if (length (cif) > 1) - ci_grd = cif{2}; - endif - else - error ("sqp: invalid equality constraint function"); - endif - elseif (! isempty (cif)) - ci_fun = cif; + global __sqp_ub__; + if (isvector (ub)) + __sqp_ub__ = ub(:); + elseif (isempty (ub)) + if (isa (x, "single")) + __sqp_ub__ = realmax ("single"); + else + __sqp_ub__ = realmax; endif else - global __sqp_lb__; - if (isvector (lb)) - __sqp_lb__ = lb; - elseif (isempty (lb)) - if (isa (x, "single")) - __sqp_lb__ = -realmax ("single"); - else - __sqp_lb__ = -realmax; - endif - else - error ("sqp: invalid lower bound"); - endif - - global __sqp_ub__; - if (isvector (ub)) - __sqp_ub__ = ub; - elseif (isempty (lb)) - if (isa (x, "single")) - __sqp_ub__ = realmax ("single"); - else - __sqp_ub__ = realmax; - endif - else - error ("sqp: invalid upper bound"); - endif + error ("sqp: invalid upper bound"); + endif - if (lb > ub) - error ("sqp: upper bound smaller than lower bound"); - endif - __sqp_ci_fun__ = ci_fun = @cf_ub_lb; - ci_grd = @cigrad_ub_lb; + if (__sqp_lb__ > __sqp_ub__) + error ("sqp: upper bound smaller than lower bound"); endif - __sqp_ci_fun__ = ci_fun; - endif - - iter_max = 100; - if (nargin > 6 && ! isempty (maxiter)) - if (isscalar (maxiter) && maxiter > 0 && round (maxiter) == maxiter) - iter_max = maxiter; - else - error ("sqp: invalid number of maximum iterations"); - endif - endif - - tol = sqrt (eps); - if (nargin > 7 && ! isempty (tolerance)) - if (isscalar (tolerance) && tolerance > 0) - tol = tolerance; - else - error ("sqp: invalid value for tolerance"); - endif + ci_fun = @cf_ub_lb; + ci_grd = @cigrad_ub_lb; endif - iter = 0; + __sqp_ci_fun__ = ci_fun; + endif # if (nargin > 3) + + iter_max = 100; + if (nargin > 6 && ! isempty (maxiter)) + if (isscalar (maxiter) && maxiter > 0 && fix (maxiter) == maxiter) + iter_max = maxiter; + else + error ("sqp: invalid number of maximum iterations"); + endif + endif + + tol = sqrt (eps); + if (nargin > 7 && ! isempty (tolerance)) + if (isscalar (tolerance) && tolerance > 0) + tol = tolerance; + else + error ("sqp: invalid value for tolerance"); + endif + endif - obj = feval (obj_fun, x); - __sqp_nfun__ = 1; + ## Evaluate objective function, constraints, and gradients at initial + ## value of x. + ## + ## obj_fun -- objective function + ## obj_grad -- objective gradient + ## ce_fun -- equality constraint functions + ## ci_fun -- inequality constraint functions + ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T + + obj = feval (obj_fun, x); + __sqp_nfun__ = 1; + + c = feval (obj_grd, x); + + ## Choose an initial NxN symmetric positive definite Hessan + ## approximation B. + n = length (x); + if (have_hess) + B = feval (obj_hess, x); + else + B = eye (n, n); + endif - c = feval (obj_grd, x); + ce = feval (ce_fun, x); + F = feval (ce_grd, x); + + ci = feval (ci_fun, x); + C = feval (ci_grd, x); + + A = [F; C]; + + ## Choose an initial lambda (x is provided by the caller). + + lambda = 100 * ones (rows (A), 1); + + qp_iter = 1; + alpha = 1; + + # report (); + + # report (iter, qp_iter, alpha, __sqp_nfun__, obj); + + info = 0; + iter = 0; - if (have_hess) - B = feval (obj_hess, x); - else - B = eye (n, n); + while (++iter < iter_max) + + ## Check convergence. This is just a simple check on the first + ## order necessary conditions. + + ## idx is the indices of the active inequality constraints. + + nr_f = rows (F); + + lambda_e = lambda((1:nr_f)'); + lambda_i = lambda((nr_f+1:end)'); + + con = [ce; ci]; + + t0 = norm (c - A' * lambda); + t1 = norm (ce); + t2 = all (ci >= 0); + t3 = all (lambda_i >= 0); + t4 = norm (lambda .* con); + + if (t2 && t3 && max ([t0; t1; t4]) < tol) + info = 101; + break; endif - ce = feval (ce_fun, x); - F = feval (ce_grd, x); + ## Compute search direction p by solving QP. + + g = -ce; + d = -ci; - ci = feval (ci_fun, x); - C = feval (ci_grd, x); + ## Discard inequality constraints that have -Inf bounds since those + ## will never be active. + idx = isinf (d) & d < 0; + d(idx) = []; + C(idx,:) = []; - A = [F; C]; + [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C, + Inf (size (d))); + + info = INFO.info; - ## Choose an initial lambda (x is provided by the caller). + ## Check QP solution and attempt to recover if it has failed. + + ## Choose mu such that p is a descent direction for the chosen + ## merit function phi. - lambda = 100 * ones (rows (A), 1); + [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd, + ce_fun, ci_fun, lambda, obj); - qp_iter = 1; - alpha = 1; + ## Evaluate objective function, constraints, and gradients at + ## x_new. + + c_new = feval (obj_grd, x_new); - ## report (); + ce_new = feval (ce_fun, x_new); + F_new = feval (ce_grd, x_new); + + ci_new = feval (ci_fun, x_new); + C_new = feval (ci_grd, x_new); - ## report (iter, qp_iter, alpha, __sqp_nfun__, obj); + A_new = [F_new; C_new]; - info = 0; + ## Set + ## + ## s = alpha * p + ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda}) - while (++iter < iter_max) + y = c_new - c; + + if (! isempty (A)) + t = ((A_new - A)'*lambda); + y -= t; + endif + + delx = x_new - x; - ## Check convergence. This is just a simple check on the first - ## order necessary conditions. + if (norm (delx) < tol * norm (x)) + info = 101; + break; + endif - ## IDX is the indices of the active inequality constraints. + if (have_hess) + + B = feval (obj_hess, x); - nr_f = rows (F); + else + + ## Update B using a quasi-Newton formula. - lambda_e = lambda((1:nr_f)'); - lambda_i = lambda((nr_f+1:end)'); + delxt = delx'; + + ## Damped BFGS. Or maybe we would actually want to use the Hessian + ## of the Lagrangian, computed directly. - con = [ce; ci]; + d1 = delxt*B*delx; + + t1 = 0.2 * d1; + t2 = delxt*y; - t0 = norm (c - A' * lambda); - t1 = norm (ce); - t2 = all (ci >= 0); - t3 = all (lambda_i >= 0); - t4 = norm (lambda .* con); + if (t2 < t1) + theta = 0.8*d1/(d1 - t2); + else + theta = 1; + endif - if (t2 && t3 && max ([t0; t1; t4]) < tol) - info = 101; + r = theta*y + (1-theta)*B*delx; + + d2 = delxt*r; + + if (d1 == 0 || d2 == 0) + info = 102; break; endif - ## Compute search direction p by solving QP. - - g = -ce; - d = -ci; - - ## Discard inequality constraints that have -Inf bounds since those - ## will never be active. - idx = isinf (d) & d < 0; - d(idx) = []; - C(idx,:) = []; - - [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C, - Inf (size (d))); - - info = INFO.info; - - ## Check QP solution and attempt to recover if it has failed. - - ## Choose mu such that p is a descent direction for the chosen - ## merit function phi. - - [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd, - ce_fun, ci_fun, lambda, obj); - - ## Evaluate objective function, constraints, and gradients at - ## x_new. - - c_new = feval (obj_grd, x_new); - - ce_new = feval (ce_fun, x_new); - F_new = feval (ce_grd, x_new); - - ci_new = feval (ci_fun, x_new); - C_new = feval (ci_grd, x_new); - - A_new = [F_new; C_new]; - - ## Set - ## - ## s = alpha * p - ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda}) - - y = c_new - c; - - if (! isempty (A)) - t = ((A_new - A)'*lambda); - y -= t; - endif - - delx = x_new - x; - - if (norm (delx) < tol * norm (x)) - info = 101; - break; - endif + B = B - B*delx*delxt*B/d1 + r*r'/d2; - if (have_hess) - - B = feval (obj_hess, x); - - else - - ## Update B using a quasi-Newton formula. - - delxt = delx'; - - ## Damped BFGS. Or maybe we would actually want to use the Hessian - ## of the Lagrangian, computed directly. - - d1 = delxt*B*delx; - - t1 = 0.2 * d1; - t2 = delxt*y; - - if (t2 < t1) - theta = 0.8*d1/(d1 - t2); - else - theta = 1; - endif - - r = theta*y + (1-theta)*B*delx; - - d2 = delxt*r; - - if (d1 == 0 || d2 == 0) - info = 102; - break; - endif - - B = B - B*delx*delxt*B/d1 + r*r'/d2; - - endif - - x = x_new; - - obj = obj_new; - - c = c_new; - - ce = ce_new; - F = F_new; - - ci = ci_new; - C = C_new; - - A = A_new; - - ## report (iter, qp_iter, alpha, __sqp_nfun__, obj); - - endwhile - - if (iter >= iter_max) - info = 103; endif - nf = __sqp_nfun__; + x = x_new; + + obj = obj_new; + + c = c_new; - else + ce = ce_new; + F = F_new; + + ci = ci_new; + C = C_new; - print_usage (); + A = A_new; + + # report (iter, qp_iter, alpha, __sqp_nfun__, obj); + + endwhile + if (iter >= iter_max) + info = 103; endif + nf = __sqp_nfun__; + endfunction @@ -595,15 +615,13 @@ if (p1 > p2) ## Reset alpha = tau_alpha * alpha for some tau_alpha in the ## range (0, tau). - tau_alpha = 0.9 * tau; ## ?? + tau_alpha = 0.9 * tau; # ?? alpha = tau_alpha * alpha; else break; endif endwhile - ## Set x_new = x + alpha * p; - x_new = x + alpha * p; endfunction @@ -743,6 +761,7 @@ %! obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2; %! %!test +%! %! x0 = [-1.8; 1.7; 1.9; -0.8; -0.8]; %! %! [x, obj, info, iter, nf, lambda] = sqp (x0, @phi, @g, []); @@ -756,3 +775,21 @@ %! obj_opt = 0.0539498477702739; %! %! assert (all (abs (x-x_opt) < 5*sqrt (eps)) && abs (obj-obj_opt) < sqrt (eps)); + +%% Test input validation +%!error sqp () +%!error sqp (1) +%!error sqp (1,2,3,4,5,6,7,8,9) +%!error sqp (1,2,3,4,5) +%!error sqp (ones(2,2)) +%!error sqp (1,cell(4,1)) +%!error sqp (1,cell(3,1),cell(3,1)) +%!error sqp (1,cell(3,1),cell(2,1),cell(3,1)) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),ones(2,2),[]) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],ones(2,2)) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),1,-1) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],ones(2,2)) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],-1) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],1.5) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],[],ones(2,2)) +%!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],[],-1)