diff scripts/optimization/sqp.m @ 5289:b98bf1d70a0a

[project @ 2005-04-21 14:43:14 by jwe]
author jwe
date Thu, 21 Apr 2005 14:43:20 +0000
parents
children 4c8a2e4e0717
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/scripts/optimization/sqp.m
@@ -0,0 +1,603 @@
+## Copyright (C) 2005 John W. Eaton
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, write to the Free
+## Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+## 02111-1307, USA.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h})
+## Solve the nonlinear program
+## @ifinfo
+##
+## @example
+##      min phi (x)
+##       x
+## @end example
+##
+## @end ifinfo
+## @iftex
+## @tex
+## @end tex
+## @end iftex
+## subject to
+## @ifinfo
+##
+## @example
+##      g(x)  = 0
+##      h(x) >= 0
+## @end example
+## @end ifinfo
+## @iftex
+## @tex
+## @end tex
+## @end iftex
+##
+## @noindent
+## using a successive quadratic programming method.
+##
+## The first argument is the initial guess for the vector @var{x}.
+##
+## The second argument is a function handle pointing to the ojective
+## function.  The objective function must be of the form
+##
+## @example
+##      y = phi (x)
+## @end example
+##
+## @noindent
+## in which @var{x} is a vector and @var{y} is a scalar.
+##
+## The second argument may also be a 2- or 3-element cell array of
+## function handles.  The first element should point to the objective
+## function, the second should point to a function that computes the
+## gradient of the objective function, and the third should point to a
+## function to compute the hessian of the objective function.  If the
+## gradient function is not supplied, the gradient is computed by finite
+## differences.  If the hessian function is not supplied, a BFGS update
+## formula is used to approximate the hessian.
+##
+## If supplied, the gradient function must be of the form
+##
+## @example
+##      g = gradient (x)
+## @end example
+##
+## @noindent
+## in which @var{x} is a vector and @var{g} is a vector.
+##
+## If supplied, the hessian function must be of the form
+##
+## @example
+##      h = hessian (x)
+## @end example
+##
+## @noindent
+## in which @var{x} is a vector and @var{h} is a matrix.
+##
+## The third and fourth arguments are function handles pointing to
+## functions that compute the equality constraints and the inequality
+## constraints, respectively.
+##
+## If your problem does not have equality (or inequality) constraints,
+## you may pass an empty matrix for @var{cef} (or @var{cif}).
+##
+## If supplied, the equality and inequality constraint functions must be
+## of the form
+##
+## @example
+##      r = f (x)
+## @end example
+##
+## @noindent
+## in which @var{x} is a vector and @var{r} is a vector.
+## 
+## The third and fourth arguments may also be 2-element cell arrays of
+## function handles.  The first element should point to the constraint
+## function and the second should point to a function that computes the
+## gradient of the constraint function:
+##
+## @example
+##                 [ d f(x)   d f(x)        d f(x) ]
+##     transpose ( [ ------   -----   ...   ------ ] )
+##                 [  dx_1     dx_2          dx_N  ]
+## @end example
+##
+## Here is an example of calling @code{sqp}:
+##
+## @example
+##      function r = g (x)
+##        r = [ sumsq(x)-10; x(2)*x(3)-5*x(4)*x(5); x(1)^3+x(2)^3+1];
+##      endfunction
+##
+##      function obj = phi (x)
+##        obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2;
+##      endfunction
+##
+##      x0 = [-1.8; 1.7; 1.9; -0.8; -0.8];
+##
+##      [x, obj, info, iter, nf, lambda] = sqp (x0, @@phi, @@g, [])
+##
+##      x =
+##      
+##        -1.71714
+##         1.59571
+##         1.82725
+##        -0.76364
+##        -0.76364
+##      
+##      obj = 0.053950
+##      info = 101
+##      iter = 8
+##      nf = 10
+##      lambda =
+##      
+##        -0.0401627
+##         0.0379578
+##        -0.0052227
+## @end example
+##
+## The value returned in @var{info} may be one of the following:
+## @table @asis
+## @item 101
+## The algorithm terminated because the norm of the last step was less
+## than @code{tol * norm (x))} (the value of tol is currently fixed at
+## @code{sqrt (eps)}---edit @file{sqp.m} to modify this value.
+## @item 102
+## The BFGS update failed.
+## @item 103
+## The maximum number of iterations was reached (the maximum number of
+## allowed iterations is currently fixed at 100---edit @file{sqp.m} to
+## increase this value).
+## @end table
+## @end deftypefn
+## @seealso{qp}
+
+function [x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif)
+
+  global nfun;
+  global __sqp_obj_fun__;
+  global __sqp_ce_fun__;
+  global __sqp_ci_fun__;
+
+  if (nargin >= 2 && nargin <= 4)
+
+    ## Choose an initial NxN symmetric positive definite Hessan
+    ## approximation B.
+
+    n = length (x);
+
+    B = eye (n, n);
+
+    ## Evaluate objective function, constraints, and gradients at initial
+    ## value of x.
+    ##
+    ## obj_fun
+    ## obj_grad
+    ## ce_fun  -- equality constraint functions
+    ## ci_fun  -- inequality constraint functions
+    ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T
+
+    obj_grd = @fd_obj_grd;
+    have_hess = 0;
+    if (iscell (objf))
+      if (length (objf) > 0)
+	__sqp_obj_fun__ = obj_fun = objf{1};
+	if (length (objf) > 1)
+	  obj_grd = objf{2};
+	  if (length (objf) > 2)
+	    obj_hess = objf{3};
+	    have_hess = 1;
+	    B = feval (obj_hess, x);
+	  endif
+	endif
+      else
+	error ("sqp: invalid objective function");
+      endif
+    else
+      __sqp_obj_fun__ = obj_fun = objf;
+    endif
+
+    ce_fun = @empty_cf;
+    ce_grd = @empty_jac;
+    if (nargin > 2)
+      ce_grd = @fd_ce_jac;
+      if (iscell (cef))
+	if (length (cef) > 0)
+	  __sqp_ce_fun__ = ce_fun = cef{1};
+	  if (length (cef) > 1)
+	    ce_grd = cef{2};
+	  endif
+	else
+	  error ("sqp: invalid equality constraint function");
+	endif
+      elseif (! isempty (cef))
+	ce_fun = cef;
+      endif
+    endif
+    __sqp_ce_fun__ = ce_fun;
+
+    ci_fun = @empty_cf;
+    ci_grd = @empty_jac;
+    if (nargin > 3)
+      ci_grd = @fd_ci_jac;
+      if (iscell (cif))
+	if (length (cif) > 0)
+	  __sqp_ci_fun__ = ci_fun = cif{1};
+	  if (length (cif) > 1)
+	    ci_grd = cif{2};
+	  endif
+	else
+	  error ("sqp: invalid equality constraint function");
+	endif
+      elseif (! isempty (cif))
+	ci_fun = cif;
+      endif
+    endif
+    __sqp_ci_fun__ = ci_fun;
+
+    iter_max = 100;
+
+    iter = 0;
+
+    obj = feval (obj_fun, x);
+    nfun = 1;
+
+    c = feval (obj_grd, x);
+
+    ce = feval (ce_fun, x);
+    F = feval (ce_grd, x);
+
+    ci = feval (ci_fun, x);
+    C = feval (ci_grd, x);
+
+    A = [F; C];
+
+    ## Choose an initial lambda (x is provided by the caller).
+
+    lambda = 100 * ones (rows (A), 1);
+
+    qp_iter = 1;
+    alpha = 1;
+
+    ## report ();
+
+    ## report (iter, qp_iter, alpha, nfun, obj);
+
+    while (++iter < iter_max)
+
+      ## Check convergence.  This is just a simple check on the first
+      ## order necessary conditions.
+
+      ## IDX is the indices of the active inequality constraints.
+
+      nr_f = rows (F);
+
+      lambda_e = lambda((1:nr_f)');
+      lambda_i = lambda((nr_f+1:end)');
+
+      con = [ce; ci];
+
+      t0 = norm (c - A' * lambda);
+      t1 = norm (ce);
+      t2 = all (ci >= 0);
+      t3 = all (lambda_i >= 0);
+      t4 = norm (lambda .* con);
+
+      tol = sqrt (eps);
+
+      if (t2 && t3 && max ([t0; t1; t4]) < tol)
+	break;
+      endif
+
+      ## Compute search direction p by solving QP.
+
+      g = -ce;
+      d = -ci;
+
+      ## Discard inequality constraints that have -Inf bounds since those
+      ## will never be active.
+      idx = isinf (d) & d < 0;
+      d(idx) = [];
+      C(idx,:) = [];
+
+      [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C,
+				      Inf * ones (size (d)));
+
+      info = INFO.info;
+
+      ## Check QP solution and attempt to recover if it has failed.
+
+      ## Choose mu such that p is a descent direction for the chosen
+      ## merit function phi.
+
+      [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd,
+					       ce_fun, ci_fun, lambda, obj);
+
+      ## Evaluate objective function, constraints, and gradients at
+      ## x_new.
+
+      c_new = feval (obj_grd, x_new);
+
+      ce_new = feval (ce_fun, x_new);
+      F_new = feval (ce_grd, x_new);
+
+      ci_new = feval (ci_fun, x_new);
+      C_new = feval (ci_grd, x_new);
+
+      A_new = [F_new; C_new];
+
+      ## Set
+      ##
+      ## s = alpha * p
+      ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda})
+
+      y = c_new - c;
+
+      if (! isempty (A))
+	t = ((A_new - A)'*lambda);
+	y -= t;
+      endif
+
+      delx = x_new - x;
+
+      if (norm (delx) < tol * norm (x))
+	info = 101;
+	break;
+      endif
+
+      if (have_hess)
+
+	B = feval (obj_hess, x);
+
+      else
+
+	## Update B using a quasi-Newton formula.
+
+	delxt = delx';
+
+	## Damped BFGS.  Or maybe we would actually want to use the Hessian
+	## of the Lagrangian, computed directly.
+
+	d1 = delxt*B*delx;
+
+	t1 = 0.2 * d1;
+	t2 = delxt*y;
+
+	if (t2 < t1)
+	  theta = 0.8*d1/(d1 - t2);
+	else
+	  theta = 1;
+	endif
+
+	r = theta*y + (1-theta)*B*delx;
+
+	d2 = delxt*r;
+
+	if (d1 == 0 || d2 == 0)
+	  info = 102;
+	  break;
+	endif
+
+	B = B - B*delx*delxt*B/d1 + r*r'/d2;
+
+      endif
+
+      x = x_new;
+
+      obj = obj_new;
+
+      c = c_new;
+
+      ce = ce_new;
+      F = F_new;
+
+      ci = ci_new;
+      C = C_new;
+
+      A = A_new;
+
+      ## report (iter, qp_iter, alpha, nfun, obj);
+
+    endwhile
+
+    if (iter >= iter_max)
+      info = 103;
+    endif
+
+    nf = nfun;
+
+  else
+
+    usage ("[x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif)");
+
+  endif
+
+### endfunction
+
+
+function [merit, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu)
+
+  global nfun;
+
+  ce = feval (ce_fun, x);
+  ci = feval (ci_fun, x);
+
+  idx = ci < 0;
+
+  con = [ce; ci(idx)];
+
+  if (isempty (obj))
+    obj = feval (obj_fun, x);
+    nfun++;
+  endif
+
+  merit = obj;
+  t = norm (con, 1) / mu;
+
+  if (! isempty (t))
+    merit += t;
+  endif
+
+### endfunction
+
+
+function [x_new, alpha, obj] = linesearch_L1 (x, p, obj_fun, obj_grd,
+					      ce_fun, ci_fun, lambda, obj)
+
+  ## Choose parameters
+  ##
+  ## eta in the range (0, 0.5)
+  ## tau in the range (0, 1)
+
+  eta = 0.25;
+  tau = 0.5;
+
+  delta_bar = sqrt (eps);
+
+  if (isempty (lambda))
+    mu = 1 / delta_bar;
+  else
+    mu = 1 / (norm (lambda, Inf) + delta_bar);
+  endif
+
+  alpha = 1;
+
+  c = feval (obj_grd, x);
+  ce = feval (ce_fun, x);
+
+  [phi_x_mu, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu);
+
+  D_phi_x_mu = c' * p;
+  d = feval (ci_fun, x);
+  ## only those elements of d corresponding
+  ## to violated constraints should be included.
+  idx = d < 0;
+  t = - norm ([ce; d(idx)], 1) / mu;
+  if (! isempty (t))
+    D_phi_x_mu += t;
+  endif
+
+  while (1)
+    [p1, obj] = phi_L1 ([], obj_fun, ce_fun, ci_fun, x+alpha*p, mu);
+    p2 = phi_x_mu+eta*alpha*D_phi_x_mu;
+    if (p1 > p2)
+      ## Reset alpha = tau_alpha * alpha for some tau_alpha in the
+      ## range (0, tau).
+      tau_alpha = 0.9 * tau;  ## ??
+      alpha = tau_alpha * alpha;
+    else
+      break;
+    endif
+  endwhile
+
+  ## Set x_new = x + alpha * p;
+
+  x_new = x + alpha * p;
+
+### endfunction
+
+
+function report (iter, qp_iter, alpha, nfun, obj)
+
+  if (nargin == 0)
+    printf ("  Itn ItQP     Step  Nfun     Objective\n");
+  else
+    printf ("%5d %4d %8.1g %5d %13.6e\n", iter, qp_iter, alpha, nfun, obj);
+  endif
+
+### endfunction
+
+
+function grd = fdgrd (f, x)
+
+  if (! isempty (f))
+    y0 = feval (f, x);
+    nx = length (x);
+    grd = zeros (nx, 1);
+    deltax = sqrt (eps);
+    for i = 1:nx
+      t = x(i);
+      x(i) += deltax;
+      grd(i) = (feval (f, x) - y0) / deltax;
+      x(i) = t;
+    endfor
+  else
+    grd = zeros (0, 1);
+  endif
+
+### endfunction
+
+
+function jac = fdjac (f, x)
+
+  if (! isempty (f))
+    y0 = feval (f, x);
+    nf = length (y0);
+    nx = length (x);
+    jac = zeros (nf, nx);
+    deltax = sqrt (eps);
+    for i = 1:nx
+      t = x(i);
+      x(i) += deltax;
+      jac(:,i) = (feval (f, x) - y0) / deltax;
+      x(i) = t;
+    endfor
+  else
+    jac = zeros  (0, nx);
+  endif
+
+### endfunction
+
+
+function grd = fd_obj_grd (x)
+
+  global __sqp_obj_fun__;
+
+  grd = fdgrd (__sqp_obj_fun__, x);
+
+### endfunction
+
+
+function res = empty_cf (x)
+
+  res = zeros (0, 1);
+
+### endfunction
+
+
+function res = empty_jac (x)
+
+  res = zeros (0, length (x));
+
+### endfunction
+
+
+function jac = fd_ce_jac (x)
+
+  global __sqp_ce_fun__;
+
+  jac = fdjac (__sqp_ce_fun__, x);
+
+### endfunction
+
+
+function jac = fd_ci_jac (x)
+
+  global __sqp_ci_fun__;
+
+  jac = fdjac (__sqp_ci_fun__, x);
+
+### endfunction