Mercurial > hg > octave-nkf
diff scripts/optimization/sqp.m @ 5289:b98bf1d70a0a
[project @ 2005-04-21 14:43:14 by jwe]
author | jwe |
---|---|
date | Thu, 21 Apr 2005 14:43:20 +0000 |
parents | |
children | 4c8a2e4e0717 |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/scripts/optimization/sqp.m @@ -0,0 +1,603 @@ +## Copyright (C) 2005 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## -*- texinfo -*- +## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}) +## Solve the nonlinear program +## @ifinfo +## +## @example +## min phi (x) +## x +## @end example +## +## @end ifinfo +## @iftex +## @tex +## @end tex +## @end iftex +## subject to +## @ifinfo +## +## @example +## g(x) = 0 +## h(x) >= 0 +## @end example +## @end ifinfo +## @iftex +## @tex +## @end tex +## @end iftex +## +## @noindent +## using a successive quadratic programming method. +## +## The first argument is the initial guess for the vector @var{x}. +## +## The second argument is a function handle pointing to the ojective +## function. The objective function must be of the form +## +## @example +## y = phi (x) +## @end example +## +## @noindent +## in which @var{x} is a vector and @var{y} is a scalar. +## +## The second argument may also be a 2- or 3-element cell array of +## function handles. The first element should point to the objective +## function, the second should point to a function that computes the +## gradient of the objective function, and the third should point to a +## function to compute the hessian of the objective function. If the +## gradient function is not supplied, the gradient is computed by finite +## differences. If the hessian function is not supplied, a BFGS update +## formula is used to approximate the hessian. +## +## If supplied, the gradient function must be of the form +## +## @example +## g = gradient (x) +## @end example +## +## @noindent +## in which @var{x} is a vector and @var{g} is a vector. +## +## If supplied, the hessian function must be of the form +## +## @example +## h = hessian (x) +## @end example +## +## @noindent +## in which @var{x} is a vector and @var{h} is a matrix. +## +## The third and fourth arguments are function handles pointing to +## functions that compute the equality constraints and the inequality +## constraints, respectively. +## +## If your problem does not have equality (or inequality) constraints, +## you may pass an empty matrix for @var{cef} (or @var{cif}). +## +## If supplied, the equality and inequality constraint functions must be +## of the form +## +## @example +## r = f (x) +## @end example +## +## @noindent +## in which @var{x} is a vector and @var{r} is a vector. +## +## The third and fourth arguments may also be 2-element cell arrays of +## function handles. The first element should point to the constraint +## function and the second should point to a function that computes the +## gradient of the constraint function: +## +## @example +## [ d f(x) d f(x) d f(x) ] +## transpose ( [ ------ ----- ... ------ ] ) +## [ dx_1 dx_2 dx_N ] +## @end example +## +## Here is an example of calling @code{sqp}: +## +## @example +## function r = g (x) +## r = [ sumsq(x)-10; x(2)*x(3)-5*x(4)*x(5); x(1)^3+x(2)^3+1]; +## endfunction +## +## function obj = phi (x) +## obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2; +## endfunction +## +## x0 = [-1.8; 1.7; 1.9; -0.8; -0.8]; +## +## [x, obj, info, iter, nf, lambda] = sqp (x0, @@phi, @@g, []) +## +## x = +## +## -1.71714 +## 1.59571 +## 1.82725 +## -0.76364 +## -0.76364 +## +## obj = 0.053950 +## info = 101 +## iter = 8 +## nf = 10 +## lambda = +## +## -0.0401627 +## 0.0379578 +## -0.0052227 +## @end example +## +## The value returned in @var{info} may be one of the following: +## @table @asis +## @item 101 +## The algorithm terminated because the norm of the last step was less +## than @code{tol * norm (x))} (the value of tol is currently fixed at +## @code{sqrt (eps)}---edit @file{sqp.m} to modify this value. +## @item 102 +## The BFGS update failed. +## @item 103 +## The maximum number of iterations was reached (the maximum number of +## allowed iterations is currently fixed at 100---edit @file{sqp.m} to +## increase this value). +## @end table +## @end deftypefn +## @seealso{qp} + +function [x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif) + + global nfun; + global __sqp_obj_fun__; + global __sqp_ce_fun__; + global __sqp_ci_fun__; + + if (nargin >= 2 && nargin <= 4) + + ## Choose an initial NxN symmetric positive definite Hessan + ## approximation B. + + n = length (x); + + B = eye (n, n); + + ## Evaluate objective function, constraints, and gradients at initial + ## value of x. + ## + ## obj_fun + ## obj_grad + ## ce_fun -- equality constraint functions + ## ci_fun -- inequality constraint functions + ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T + + obj_grd = @fd_obj_grd; + have_hess = 0; + if (iscell (objf)) + if (length (objf) > 0) + __sqp_obj_fun__ = obj_fun = objf{1}; + if (length (objf) > 1) + obj_grd = objf{2}; + if (length (objf) > 2) + obj_hess = objf{3}; + have_hess = 1; + B = feval (obj_hess, x); + endif + endif + else + error ("sqp: invalid objective function"); + endif + else + __sqp_obj_fun__ = obj_fun = objf; + endif + + ce_fun = @empty_cf; + ce_grd = @empty_jac; + if (nargin > 2) + ce_grd = @fd_ce_jac; + if (iscell (cef)) + if (length (cef) > 0) + __sqp_ce_fun__ = ce_fun = cef{1}; + if (length (cef) > 1) + ce_grd = cef{2}; + endif + else + error ("sqp: invalid equality constraint function"); + endif + elseif (! isempty (cef)) + ce_fun = cef; + endif + endif + __sqp_ce_fun__ = ce_fun; + + ci_fun = @empty_cf; + ci_grd = @empty_jac; + if (nargin > 3) + ci_grd = @fd_ci_jac; + if (iscell (cif)) + if (length (cif) > 0) + __sqp_ci_fun__ = ci_fun = cif{1}; + if (length (cif) > 1) + ci_grd = cif{2}; + endif + else + error ("sqp: invalid equality constraint function"); + endif + elseif (! isempty (cif)) + ci_fun = cif; + endif + endif + __sqp_ci_fun__ = ci_fun; + + iter_max = 100; + + iter = 0; + + obj = feval (obj_fun, x); + nfun = 1; + + c = feval (obj_grd, x); + + ce = feval (ce_fun, x); + F = feval (ce_grd, x); + + ci = feval (ci_fun, x); + C = feval (ci_grd, x); + + A = [F; C]; + + ## Choose an initial lambda (x is provided by the caller). + + lambda = 100 * ones (rows (A), 1); + + qp_iter = 1; + alpha = 1; + + ## report (); + + ## report (iter, qp_iter, alpha, nfun, obj); + + while (++iter < iter_max) + + ## Check convergence. This is just a simple check on the first + ## order necessary conditions. + + ## IDX is the indices of the active inequality constraints. + + nr_f = rows (F); + + lambda_e = lambda((1:nr_f)'); + lambda_i = lambda((nr_f+1:end)'); + + con = [ce; ci]; + + t0 = norm (c - A' * lambda); + t1 = norm (ce); + t2 = all (ci >= 0); + t3 = all (lambda_i >= 0); + t4 = norm (lambda .* con); + + tol = sqrt (eps); + + if (t2 && t3 && max ([t0; t1; t4]) < tol) + break; + endif + + ## Compute search direction p by solving QP. + + g = -ce; + d = -ci; + + ## Discard inequality constraints that have -Inf bounds since those + ## will never be active. + idx = isinf (d) & d < 0; + d(idx) = []; + C(idx,:) = []; + + [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C, + Inf * ones (size (d))); + + info = INFO.info; + + ## Check QP solution and attempt to recover if it has failed. + + ## Choose mu such that p is a descent direction for the chosen + ## merit function phi. + + [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd, + ce_fun, ci_fun, lambda, obj); + + ## Evaluate objective function, constraints, and gradients at + ## x_new. + + c_new = feval (obj_grd, x_new); + + ce_new = feval (ce_fun, x_new); + F_new = feval (ce_grd, x_new); + + ci_new = feval (ci_fun, x_new); + C_new = feval (ci_grd, x_new); + + A_new = [F_new; C_new]; + + ## Set + ## + ## s = alpha * p + ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda}) + + y = c_new - c; + + if (! isempty (A)) + t = ((A_new - A)'*lambda); + y -= t; + endif + + delx = x_new - x; + + if (norm (delx) < tol * norm (x)) + info = 101; + break; + endif + + if (have_hess) + + B = feval (obj_hess, x); + + else + + ## Update B using a quasi-Newton formula. + + delxt = delx'; + + ## Damped BFGS. Or maybe we would actually want to use the Hessian + ## of the Lagrangian, computed directly. + + d1 = delxt*B*delx; + + t1 = 0.2 * d1; + t2 = delxt*y; + + if (t2 < t1) + theta = 0.8*d1/(d1 - t2); + else + theta = 1; + endif + + r = theta*y + (1-theta)*B*delx; + + d2 = delxt*r; + + if (d1 == 0 || d2 == 0) + info = 102; + break; + endif + + B = B - B*delx*delxt*B/d1 + r*r'/d2; + + endif + + x = x_new; + + obj = obj_new; + + c = c_new; + + ce = ce_new; + F = F_new; + + ci = ci_new; + C = C_new; + + A = A_new; + + ## report (iter, qp_iter, alpha, nfun, obj); + + endwhile + + if (iter >= iter_max) + info = 103; + endif + + nf = nfun; + + else + + usage ("[x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif)"); + + endif + +### endfunction + + +function [merit, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu) + + global nfun; + + ce = feval (ce_fun, x); + ci = feval (ci_fun, x); + + idx = ci < 0; + + con = [ce; ci(idx)]; + + if (isempty (obj)) + obj = feval (obj_fun, x); + nfun++; + endif + + merit = obj; + t = norm (con, 1) / mu; + + if (! isempty (t)) + merit += t; + endif + +### endfunction + + +function [x_new, alpha, obj] = linesearch_L1 (x, p, obj_fun, obj_grd, + ce_fun, ci_fun, lambda, obj) + + ## Choose parameters + ## + ## eta in the range (0, 0.5) + ## tau in the range (0, 1) + + eta = 0.25; + tau = 0.5; + + delta_bar = sqrt (eps); + + if (isempty (lambda)) + mu = 1 / delta_bar; + else + mu = 1 / (norm (lambda, Inf) + delta_bar); + endif + + alpha = 1; + + c = feval (obj_grd, x); + ce = feval (ce_fun, x); + + [phi_x_mu, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu); + + D_phi_x_mu = c' * p; + d = feval (ci_fun, x); + ## only those elements of d corresponding + ## to violated constraints should be included. + idx = d < 0; + t = - norm ([ce; d(idx)], 1) / mu; + if (! isempty (t)) + D_phi_x_mu += t; + endif + + while (1) + [p1, obj] = phi_L1 ([], obj_fun, ce_fun, ci_fun, x+alpha*p, mu); + p2 = phi_x_mu+eta*alpha*D_phi_x_mu; + if (p1 > p2) + ## Reset alpha = tau_alpha * alpha for some tau_alpha in the + ## range (0, tau). + tau_alpha = 0.9 * tau; ## ?? + alpha = tau_alpha * alpha; + else + break; + endif + endwhile + + ## Set x_new = x + alpha * p; + + x_new = x + alpha * p; + +### endfunction + + +function report (iter, qp_iter, alpha, nfun, obj) + + if (nargin == 0) + printf (" Itn ItQP Step Nfun Objective\n"); + else + printf ("%5d %4d %8.1g %5d %13.6e\n", iter, qp_iter, alpha, nfun, obj); + endif + +### endfunction + + +function grd = fdgrd (f, x) + + if (! isempty (f)) + y0 = feval (f, x); + nx = length (x); + grd = zeros (nx, 1); + deltax = sqrt (eps); + for i = 1:nx + t = x(i); + x(i) += deltax; + grd(i) = (feval (f, x) - y0) / deltax; + x(i) = t; + endfor + else + grd = zeros (0, 1); + endif + +### endfunction + + +function jac = fdjac (f, x) + + if (! isempty (f)) + y0 = feval (f, x); + nf = length (y0); + nx = length (x); + jac = zeros (nf, nx); + deltax = sqrt (eps); + for i = 1:nx + t = x(i); + x(i) += deltax; + jac(:,i) = (feval (f, x) - y0) / deltax; + x(i) = t; + endfor + else + jac = zeros (0, nx); + endif + +### endfunction + + +function grd = fd_obj_grd (x) + + global __sqp_obj_fun__; + + grd = fdgrd (__sqp_obj_fun__, x); + +### endfunction + + +function res = empty_cf (x) + + res = zeros (0, 1); + +### endfunction + + +function res = empty_jac (x) + + res = zeros (0, length (x)); + +### endfunction + + +function jac = fd_ce_jac (x) + + global __sqp_ce_fun__; + + jac = fdjac (__sqp_ce_fun__, x); + +### endfunction + + +function jac = fd_ci_jac (x) + + global __sqp_ci_fun__; + + jac = fdjac (__sqp_ci_fun__, x); + +### endfunction