Mercurial > hg > octave-lyh
changeset 8590:c136d313206a
improvements to fsolve
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 26 Jan 2009 11:29:16 +0100 |
parents | 0131fa223dbc |
children | ffc9e9737507 |
files | scripts/ChangeLog scripts/optimization/__fdjac__.m scripts/optimization/fsolve.m |
diffstat | 3 files changed, 111 insertions(+), 46 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/ChangeLog +++ b/scripts/ChangeLog @@ -1,3 +1,11 @@ +2009-01-17 Jaroslav Hajek <highegg@gmail.com> + + * optimization/__fdjac__.m: Fix setting up h. + * optimization/fsolve.m: Allow underdetermined systems. Use QR for + large enough square and overdetermined systems, with pivoting in the + first step. Simplify options. Adjust defaults - make TR radius + tolerance less stringent. Support DisplayFcn. + 2008-12-24 Ben Abbott <bpabbott@mac.com> * path/savepath.m: Respect cmd-line and env paths.
--- a/scripts/optimization/__fdjac__.m +++ b/scripts/optimization/__fdjac__.m @@ -24,7 +24,7 @@ err = sqrt (max (eps, err)); fvec = fcn (x); fv = fvec(:); - h = max (abs (x(:))*err, (norm (fv, Inf)*eps)/realmax); + h = abs (x(:))*err; h(h == 0) = err; fjac = zeros (length (fv), numel (x)); for i = 1:numel (x)
--- a/scripts/optimization/fsolve.m +++ b/scripts/optimization/fsolve.m @@ -1,4 +1,4 @@ -## Copyright (C) 2008 VZLU Prague, a.s. +## Copyright (C) 2008, 2009 VZLU Prague, a.s. ## ## This file is part of Octave. ## @@ -32,8 +32,8 @@ ## @var{options} is a structure specifying additional options. ## Currently, @code{fsolve} recognizes these options: ## @code{"FunValCheck"}, @code{"OutputFcn"}, @code{"TolX"}, -## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"}, and -## @code{"Jacobian"}. +## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"}, +## @code{"Jacobian"} and @code{"Updating"}. ## ## If @code{"Jacobian"} is @code{"on"}, it specifies that @var{fcn}, ## called with 2 output arguments, also returns the Jacobian matrix @@ -41,6 +41,13 @@ ## the termination tolerance in the unknown variables, while ## @code{"TolFun"} is a tolerance for equations. Default is @code{1e1*eps} ## for @code{"TolX"} and @code{1e2*eps} for @code{"TolFun"}. +## If @code{"Updating"} is true, the function will attempt to use Broyden +## updates to update the Jacobian, in order to reduce the amount of jacobian +## calculations. +## If your user function always calculates the Jacobian (regardless of number +## of output arguments), this option provides no advantage and should be set to +## false. +## ## For description of the other options, see @code{optimset}. ## ## On return, @var{fval} contains the value of the function @var{fcn} @@ -78,7 +85,8 @@ maxiter = optimget (options, "MaxIter", Inf); maxfev = optimget (options, "MaxFunEvals", Inf); outfcn = optimget (options, "OutputFcn"); - pivoting = optimget (options, "Pivoting", false); + updating = optimget (options, "Updating", true); + funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on"); if (funvalchk) @@ -91,8 +99,8 @@ macheps = eps (class (x0)); - tolx = optimget (options, "TolX", 1e1*macheps); - tolf = optimget (options, "TolFun", 1e2*macheps); + tolx = optimget (options, "TolX", sqrt (macheps)); + tolf = optimget (options, "TolFun", sqrt (macheps)); factor = 100; ## FIXME: TypicalX corresponds to user scaling (???) @@ -121,22 +129,30 @@ fn = norm (fvec); m = length (fvec); n = length (x); - if (m < n) - error ("fsolve:under", "cannot solve underdetermined systems"); - elseif (m > n && niter == 1) - if (isempty (optimget (options, "TolFun"))) - warning ("an overdetermined system cannot usually be solved exactly; consider specifying the TolFun option"); + + ## For square and overdetermined systems, we update a (pivoted) QR + ## factorization of the jacobian to avoid solving a full system in each + ## step. In this case, we pass a triangular matrix to __dogleg__. + ## Pivoted QR is used for slightly better robustness and invariance + ## w.r.t. permutations of variables. + useqr = updating && m >= n && n > 10; + + if (useqr) + ## Get QR factorization of the jacobian, optionally with column pivoting. + ## TODO: pivoting is only done in the first step, to get invariance + ## w.r.t. permutations of variables. Maybe it could be beneficial to + ## repivot occassionally? + if (niter == 1) + [q, r, p] = qr (fjac, 0); + ## p is a column vector. Blame Matlab for the inconsistency. + ## Octave can handle permutation matrices gracefully. + p = eye (n)(:, p); + else + [q, r] = qr (fjac*p, 0); endif endif - - ## Get QR factorization of the jacobian. - if (pivoting) - [q, r, p] = qr (fjac); - else - [q, r] = qr (fjac); - endif - ## Get column norms, use them as scaling factor. + ## Get column norms, use them as scaling factors. jcn = norm (fjac, 'columns').'; if (niter == 1) if (autodg) @@ -144,7 +160,8 @@ dg(dg == 0) = 1; endif xn = norm (dg .* x); - delta = factor * xn; + ## FIXME: something better? + delta = max (factor * xn, 1); endif ## Rescale if necessary. @@ -157,22 +174,33 @@ ## Inner loop. while (niter <= maxiter && nfev < maxfev && ! info) - qtf = q'*fvec; + if (useqr) + tr_mat = r; + tr_vec = q'*fvec; + else + tr_mat = fjac; + tr_vec = fvec; + endif - ## Get TR model (dogleg) minimizer - ## in case of an overdetermined system, get lsq solution. - s = - __dogleg__ (r(1:n,:), qtf(1:n), dg, delta); - if (pivoting) - s = p * s; + ## Get trust-region model (dogleg) minimizer. + if (useqr) + qtf = q'*fvec; + s = - __dogleg__ (r, qtf, dg, delta); + w = qtf + r * s; + s = p * s; + else + s = - __dogleg__ (fjac, fvec, dg, delta); + w = fvec + fjac * s; endif + sn = norm (dg .* s); - if (niter == 1) delta = min (delta, sn); endif fvec1 = fcn (reshape (x + s, xsiz)) (:); fn1 = norm (fvec1); + nfev ++; if (fn1 < fn) ## Scaled actual reduction. @@ -182,11 +210,6 @@ endif ## Scaled predicted reduction, and ratio. - if (pivoting) - w = qtf + r*(p'*s); - else - w = qtf + r*s; - endif t = norm (w); if (t < fn) prered = 1 - (t/fn)^2; @@ -201,7 +224,7 @@ nsuc = 0; nfail ++; delta *= 0.5; - if (delta <= sqrt (macheps)*xn) + if (delta <= 1e1*macheps*xn) ## Trust region became uselessly small. info = -3; break; @@ -225,6 +248,20 @@ niter ++; endif + ## FIXME: should outputfcn be only called after a successful iteration? + if (outfcn) + optimvalues.iter = niter; + optimvalues.funccount = nfev; + optimvalues.fval = fn; + optimvalues.searchdirection = s; + state = 'iter'; + stop = outfcn (x, optimvalues, state); + if (stop) + info = -1; + break; + endif + endif + ## Tests for termination conditions. A mysterious place, anything ## can happen if you change something here... @@ -254,20 +291,25 @@ endif ## Criterion for recalculating jacobian. - if (nfail == 2) + if (! updating || nfail == 2) break; endif ## Compute the scaled Broyden update. - u = (fvec1 - q*w) / sn; - v = dg .* ((dg .* s) / sn); - if (pivoting) - v = p'*v; + if (useqr) + u = (fvec1 - q*w) / sn; + v = dg .* ((dg .* s) / sn); + v = p' * v; + + ## Update the QR factorization. + [q, r] = qrupdate (q, r, u, v); + else + u = (fvec1 - w); + v = dg .* ((dg .* s) / sn); + + ## update the jacobian + fjac += u * v'; endif - - ## Update the QR factorization. - [q, r] = qrupdate (q, r, u, v); - endwhile endwhile @@ -276,7 +318,7 @@ fvec = reshape (fvec, fsiz); output.iterations = niter; - output.funcCount = niter + 2; + output.funcCount = nfev; endfunction @@ -341,7 +383,7 @@ %! 2.395931; %! 2.005014 ]; %! tol = 1.0e-5; -%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ], optimset ("TolFun", 1e-6)); +%! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ]); %! assert (info > 0); %! assert (norm (x - x_opt, Inf) < tol); %! assert (norm (fval) < tol); @@ -359,8 +401,23 @@ %! 2.395931; %! 2.005014 ]; %! tol = 1.0e-5; -%! opt = optimset ("Pivoting", true); +%! opt = optimset ("Updating", "qrp"); %! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ], opt); %! assert (info > 0); %! assert (norm (x - x_opt, Inf) < tol); %! assert (norm (fval) < tol); + +%!test +%! b0 = 3; +%! a0 = 0.2; +%! x = 0:.5:5; +%! noise = 1e-5 * sin (100*x); +%! y = exp (-a0*x) + b0 + noise; +%! c_opt = [a0, b0]; +%! tol = 1e-5; +%! +%! [c, fval, info, output] = fsolve (@(c) (exp(-c(1)*x) + c(2) - y), [0, 0]); +%! assert (info > 0); +%! assert (norm (c - c_opt, Inf) < tol); +%! assert (norm (fval) < norm (noise)); +