Mercurial > hg > octave-lyh
comparison scripts/optimization/fsolve.m @ 8306:43795cf108d0
initial implementation of fsolve
remove old fsolve code
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Sun, 28 Sep 2008 21:09:35 +0200 |
parents | |
children | a762d9daa700 |
comparison
equal
deleted
inserted
replaced
8305:368b504777a8 | 8306:43795cf108d0 |
---|---|
1 ## Copyright (C) 2008 VZLU Prague, a.s. | |
2 ## | |
3 ## This file is part of Octave. | |
4 ## | |
5 ## Octave is free software; you can redistribute it and/or modify it | |
6 ## under the terms of the GNU General Public License as published by | |
7 ## the Free Software Foundation; either version 3 of the License, or (at | |
8 ## your option) any later version. | |
9 ## | |
10 ## Octave is distributed in the hope that it will be useful, but | |
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 ## General Public License for more details. | |
14 ## | |
15 ## You should have received a copy of the GNU General Public License | |
16 ## along with Octave; see the file COPYING. If not, see | |
17 ## <http://www.gnu.org/licenses/>. | |
18 ## | |
19 ## Author: Jaroslav Hajek <highegg@gmail.com> | |
20 | |
21 # -*- texinfo -*- | |
22 # @deftypefn{Function File} {} fsolve(@var{fcn}, @var{x0}, @var{options}) | |
23 # @deftypefnx{Function File} {[@var{x}, @var{fvec}, @var{info}, @var{output}, @var{fjac}]} = fsolve (@var{fcn}, @dots{}) | |
24 # Solves a system of nonlinear equations defined by the function @var{fcn}. | |
25 # @var{fcn} should accepts a vector (array) defining the unknown variables, | |
26 # and return a vector of left-hand sides of the equations. Right-hand sides | |
27 # are defined to be zeros. | |
28 # In other words, this function attempts to determine a vector @var{X} such | |
29 # that @code{@var{fcn}(@var{X})} gives (approximately) all zeros. | |
30 # @var{x0} determines a starting guess. The shape of @var{x0} is preserved | |
31 # in all calls to @var{fcn}, but otherwise it is treated as a column vector. | |
32 # @var{options} is a structure specifying additional options. Currently, fsolve | |
33 # recognizes these options: FunValCheck, OutputFcn, TolX, TolFun, MaxIter, | |
34 # MaxFunEvals and Jacobian. | |
35 # | |
36 # If Jacobian is 'on', it specifies that @var{fcn}, called with 2 output arguments, | |
37 # also returns the Jacobian matrix of right-hand sides at the requested point. | |
38 # TolX specifies the termination tolerance in the unknown variables, while | |
39 # TolFun is a tolerance for equations. Default is @code{1e1*eps} | |
40 # for TolX and @code{1e2*eps} for TolFun. | |
41 # For description of the other options, see @code{optimset}. | |
42 # | |
43 # On return, @var{fval} contains the value of the function @var{fcn} | |
44 # evaluated at @var{x}, and @var{info} may be one of the following values: | |
45 # | |
46 # @table @asis | |
47 # @item 1 | |
48 # Converged to a solution point. Relative residual error is less than specified | |
49 # by TolFun. | |
50 # @item 2 | |
51 # Last relative step size was less that TolX. | |
52 # @item 3 | |
53 # Last relative decrease in residual was less than TolF. | |
54 # @item 0 | |
55 # Iteration limit exceeded. | |
56 # @item -3 | |
57 # The trust region radius became excessively small. | |
58 # @end table | |
59 # | |
60 # Note: If you only have a single nonlinear equation of one variable, using | |
61 # @code{fzero} is usually a much better idea. | |
62 # @seealso{fzero,optimset} | |
63 # @end deftypefn | |
64 | |
65 function [x, fvec, info, output, fjac] = fsolve (fcn, x0, options) | |
66 | |
67 if (nargin < 3) | |
68 options = struct (); | |
69 endif | |
70 | |
71 xsiz = size (x0); | |
72 n = numel (x0); | |
73 | |
74 has_jac = strcmp (optimget (options, "Jacobian", "off"), "on"); | |
75 maxiter = optimget (options, "MaxIter", Inf); | |
76 maxfev = optimget (options, "MaxFunEvals", Inf); | |
77 outfcn = optimget (options, "OutputFcn"); | |
78 funvalchk = strcmp (optimget (options, "FunValCheck", "off"), "on"); | |
79 | |
80 if (funvalchk) | |
81 # replace fun with a guarded version | |
82 fun = @(x) guarded_eval (fun, x); | |
83 endif | |
84 | |
85 # These defaults are rather stringent. I think that normally, user prefers | |
86 # accuracy to performance. | |
87 | |
88 macheps = eps (class (x0)); | |
89 | |
90 tolx = optimget (options, "TolX", 1e1*macheps); | |
91 tolf = optimget (options, "TolFun",1e2*macheps); | |
92 | |
93 factor = 100; | |
94 # FIXME: TypicalX corresponds to user scaling (???) | |
95 autodg = true; | |
96 | |
97 niter = 1; nfev = 0; | |
98 | |
99 x = x0(:); | |
100 info = 0; | |
101 | |
102 # outer loop | |
103 while (niter < maxiter && nfev < maxfev && ! info) | |
104 | |
105 # calc func value and jacobian (possibly via FD) | |
106 # handle arbitrary shapes of x and f and remember them | |
107 if (has_jac) | |
108 [fvec, fjac] = fcn (reshape (x, xsiz)); | |
109 nfev ++; | |
110 else | |
111 [fvec, fjac] = __fdjac__ (fcn, reshape (x, xsiz)); | |
112 nfev += 1 + length (x); | |
113 endif | |
114 fsiz = size (fvec); | |
115 fvec = fvec(:); | |
116 fn = norm (fvec); | |
117 | |
118 # get QR factorization of the jacobian | |
119 [q, r] = qr (fjac); | |
120 | |
121 # get column norms, use them as scaling factor | |
122 jcn = norm (fjac, 'columns').'; | |
123 if (niter == 1) | |
124 if (autodg) | |
125 dg = jcn; | |
126 dg(dg == 0) = 1; | |
127 endif | |
128 xn = norm (dg .* x); | |
129 delta = factor * xn; | |
130 endif | |
131 | |
132 # rescale if necessary | |
133 if (autodg) | |
134 dg = max (dg, jcn); | |
135 endif | |
136 | |
137 nfail = 0; | |
138 nsuc = 0; | |
139 # inner loop | |
140 while (niter <= maxiter && nfev < maxfev && ! info) | |
141 | |
142 qtf = q'*fvec; | |
143 | |
144 # get TR model (dogleg) minimizer | |
145 p = - __dogleg__ (r, qtf, dg, delta); | |
146 pn = norm (dg .* p); | |
147 | |
148 if (niter == 1) | |
149 delta = min (delta, pn); | |
150 endif | |
151 | |
152 fvec1 = fcn (reshape (x + p, xsiz)) (:); | |
153 fn1 = norm (fvec1); | |
154 | |
155 if (fn1 < fn) | |
156 # scaled actual reduction | |
157 actred = 1 - (fn1/fn)^2; | |
158 else | |
159 actred = -1; | |
160 endif | |
161 | |
162 # scaled predicted reduction, and ratio | |
163 w = qtf + r*p; | |
164 t = norm (w); | |
165 if (t < fn) | |
166 prered = 1 - (t/fn)^2; | |
167 ratio = actred / prered; | |
168 else | |
169 prered = 0; | |
170 ratio = 0; | |
171 endif | |
172 | |
173 # update delta | |
174 if (ratio < 0.1) | |
175 nsuc = 0; | |
176 nfail ++; | |
177 delta *= 0.5; | |
178 if (delta <= sqrt (macheps)*xn) | |
179 # trust region became uselessly small | |
180 info = -3; | |
181 break; | |
182 endif | |
183 else | |
184 nfail = 0; | |
185 nsuc ++; | |
186 if (abs (1-ratio) <= 0.1) | |
187 delta = 2*pn; | |
188 elseif (ratio >= 0.5 || nsuc > 1) | |
189 delta = max (delta, 2*pn); | |
190 endif | |
191 endif | |
192 | |
193 if (ratio >= 1e-4) | |
194 # successful iteration | |
195 x += p; | |
196 xn = norm (dg .* x); | |
197 fvec = fvec1; | |
198 fn = fn1; | |
199 niter ++; | |
200 endif | |
201 | |
202 # Tests for termination conditions. A mysterious place, anything can | |
203 # happen if you change something here... | |
204 | |
205 # The rule of thumb (which I'm not sure M*b is quite following) is that | |
206 # for a tolerance that depends on scaling, only 0 makes sense as a | |
207 # default value. But 0 usually means uselessly long iterations, | |
208 # so we need scaling-independent tolerances wherever possible. | |
209 | |
210 # XXX: why tolf*n*xn? If abs (e) ~ abs(x) * eps is a vector of | |
211 # perturbations of x, then norm (fjac*e) <= eps*n*xn, i.e. by tolf ~ | |
212 # eps we demand as much accuracy as we can expect. | |
213 if (fn <= tolf*n*xn) | |
214 info = 1; | |
215 # The following tests done only after successful step. | |
216 elseif (actred > 0) | |
217 # This one is classic. Note that we use scaled variables again, but | |
218 # compare to scaled step, so nothing bad. | |
219 if (pn <= tolx*xn) | |
220 info = 2; | |
221 # Again a classic one. It seems weird to use the same tolf for two | |
222 # different tests, but that's what M*b manual appears to say. | |
223 elseif (actred < tolf) | |
224 info = 3 | |
225 endif | |
226 endif | |
227 | |
228 # criterion for recalculating jacobian | |
229 if (nfail == 2) | |
230 break; | |
231 endif | |
232 | |
233 # compute the scaled Broyden update | |
234 u = (fvec1 - q*w) / pn; | |
235 v = dg .* ((dg .* p) / pn); | |
236 | |
237 # update the QR factorization | |
238 [q, r] = qrupdate (q, r, u, v); | |
239 | |
240 endwhile | |
241 endwhile | |
242 | |
243 # restore original shapes | |
244 x = reshape (x, xsiz); | |
245 fvec = reshape (fvec, fsiz); | |
246 | |
247 output.iterations = niter; | |
248 output.funcCount = niter + 2; | |
249 | |
250 endfunction | |
251 | |
252 # an assistant function that evaluates a function handle and checks for bad | |
253 # results. | |
254 function fx = guarded_eval (fun, x) | |
255 fx = fun (x); | |
256 if (! all (isreal (fx))) | |
257 error ("fsolve:notreal", "fsolve: non-real value encountered"); | |
258 elseif (any (isnan (fx))) | |
259 error ("fsolve:isnan", "fsolve: NaN value encountered"); | |
260 endif | |
261 endfunction | |
262 | |
263 %!function retval = f (p) | |
264 %! x = p(1); | |
265 %! y = p(2); | |
266 %! z = p(3); | |
267 %! retval = zeros (3, 1); | |
268 %! retval(1) = sin(x) + y**2 + log(z) - 7; | |
269 %! retval(2) = 3*x + 2**y -z**3 + 1; | |
270 %! retval(3) = x + y + z - 5; | |
271 %!test | |
272 %! x_opt = [ 0.599054; | |
273 %! 2.395931; | |
274 %! 2.005014 ]; | |
275 %! tol = 1.0e-5; | |
276 %! [x, fval, info] = fsolve (@f, [ 0.5; 2.0; 2.5 ]); | |
277 %! assert (info > 0); | |
278 %! assert (norm (x - x_opt, 1) < tol); | |
279 %! assert (norm (fval) < tol); | |
280 | |
281 %!function retval = f (p) | |
282 %! x = p(1); | |
283 %! y = p(2); | |
284 %! z = p(3); | |
285 %! w = p(4); | |
286 %! retval = zeros (4, 1); | |
287 %! retval(1) = 3*x + 4*y + exp (z + w) - 1.007; | |
288 %! retval(2) = 6*x - 4*y + exp (3*z + w) - 11; | |
289 %! retval(3) = x^4 - 4*y^2 + 6*z - 8*w - 20; | |
290 %! retval(4) = x^2 + 2*y^3 + z - w - 4; | |
291 %!test | |
292 %! x_opt = [ -0.767297326653401, 0.590671081117440, 1.47190018629642, -1.52719341133957 ]; | |
293 %! tol = 1.0e-5; | |
294 %! [x, fval, info] = fsolve (@f, [-1, 1, 2, -1]); | |
295 %! assert (info > 0); | |
296 %! assert (norm (x - x_opt, 1) < tol); | |
297 %! assert (norm (fval) < tol); |