5289
|
1 ## Copyright (C) 2005 John W. Eaton |
|
2 ## |
|
3 ## This file is part of Octave. |
|
4 ## |
|
5 ## Octave is free software; you can redistribute it and/or modify it |
|
6 ## under the terms of the GNU General Public License as published by |
|
7 ## the Free Software Foundation; either version 2, or (at your option) |
|
8 ## any later version. |
|
9 ## |
|
10 ## Octave is distributed in the hope that it will be useful, but |
|
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 ## General Public License for more details. |
|
14 ## |
|
15 ## You should have received a copy of the GNU General Public License |
|
16 ## along with Octave; see the file COPYING. If not, write to the Free |
|
17 ## Software Foundation, 59 Temple Place - Suite 330, Boston, MA |
|
18 ## 02111-1307, USA. |
|
19 |
|
20 ## -*- texinfo -*- |
|
21 ## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}) |
|
22 ## Solve the nonlinear program |
|
23 ## @ifinfo |
|
24 ## |
|
25 ## @example |
|
26 ## min phi (x) |
|
27 ## x |
|
28 ## @end example |
|
29 ## |
|
30 ## @end ifinfo |
|
31 ## @iftex |
|
32 ## @tex |
|
33 ## @end tex |
|
34 ## @end iftex |
|
35 ## subject to |
|
36 ## @ifinfo |
|
37 ## |
|
38 ## @example |
|
39 ## g(x) = 0 |
|
40 ## h(x) >= 0 |
|
41 ## @end example |
|
42 ## @end ifinfo |
|
43 ## @iftex |
|
44 ## @tex |
|
45 ## @end tex |
|
46 ## @end iftex |
|
47 ## |
|
48 ## @noindent |
|
49 ## using a successive quadratic programming method. |
|
50 ## |
|
51 ## The first argument is the initial guess for the vector @var{x}. |
|
52 ## |
|
53 ## The second argument is a function handle pointing to the ojective |
|
54 ## function. The objective function must be of the form |
|
55 ## |
|
56 ## @example |
|
57 ## y = phi (x) |
|
58 ## @end example |
|
59 ## |
|
60 ## @noindent |
|
61 ## in which @var{x} is a vector and @var{y} is a scalar. |
|
62 ## |
|
63 ## The second argument may also be a 2- or 3-element cell array of |
|
64 ## function handles. The first element should point to the objective |
|
65 ## function, the second should point to a function that computes the |
|
66 ## gradient of the objective function, and the third should point to a |
|
67 ## function to compute the hessian of the objective function. If the |
|
68 ## gradient function is not supplied, the gradient is computed by finite |
|
69 ## differences. If the hessian function is not supplied, a BFGS update |
|
70 ## formula is used to approximate the hessian. |
|
71 ## |
|
72 ## If supplied, the gradient function must be of the form |
|
73 ## |
|
74 ## @example |
|
75 ## g = gradient (x) |
|
76 ## @end example |
|
77 ## |
|
78 ## @noindent |
|
79 ## in which @var{x} is a vector and @var{g} is a vector. |
|
80 ## |
|
81 ## If supplied, the hessian function must be of the form |
|
82 ## |
|
83 ## @example |
|
84 ## h = hessian (x) |
|
85 ## @end example |
|
86 ## |
|
87 ## @noindent |
|
88 ## in which @var{x} is a vector and @var{h} is a matrix. |
|
89 ## |
|
90 ## The third and fourth arguments are function handles pointing to |
|
91 ## functions that compute the equality constraints and the inequality |
|
92 ## constraints, respectively. |
|
93 ## |
|
94 ## If your problem does not have equality (or inequality) constraints, |
|
95 ## you may pass an empty matrix for @var{cef} (or @var{cif}). |
|
96 ## |
|
97 ## If supplied, the equality and inequality constraint functions must be |
|
98 ## of the form |
|
99 ## |
|
100 ## @example |
|
101 ## r = f (x) |
|
102 ## @end example |
|
103 ## |
|
104 ## @noindent |
|
105 ## in which @var{x} is a vector and @var{r} is a vector. |
|
106 ## |
|
107 ## The third and fourth arguments may also be 2-element cell arrays of |
|
108 ## function handles. The first element should point to the constraint |
|
109 ## function and the second should point to a function that computes the |
|
110 ## gradient of the constraint function: |
|
111 ## |
|
112 ## @example |
|
113 ## [ d f(x) d f(x) d f(x) ] |
|
114 ## transpose ( [ ------ ----- ... ------ ] ) |
|
115 ## [ dx_1 dx_2 dx_N ] |
|
116 ## @end example |
|
117 ## |
|
118 ## Here is an example of calling @code{sqp}: |
|
119 ## |
|
120 ## @example |
|
121 ## function r = g (x) |
|
122 ## r = [ sumsq(x)-10; x(2)*x(3)-5*x(4)*x(5); x(1)^3+x(2)^3+1]; |
|
123 ## endfunction |
|
124 ## |
|
125 ## function obj = phi (x) |
|
126 ## obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2; |
|
127 ## endfunction |
|
128 ## |
|
129 ## x0 = [-1.8; 1.7; 1.9; -0.8; -0.8]; |
|
130 ## |
|
131 ## [x, obj, info, iter, nf, lambda] = sqp (x0, @@phi, @@g, []) |
|
132 ## |
|
133 ## x = |
|
134 ## |
|
135 ## -1.71714 |
|
136 ## 1.59571 |
|
137 ## 1.82725 |
|
138 ## -0.76364 |
|
139 ## -0.76364 |
|
140 ## |
|
141 ## obj = 0.053950 |
|
142 ## info = 101 |
|
143 ## iter = 8 |
|
144 ## nf = 10 |
|
145 ## lambda = |
|
146 ## |
|
147 ## -0.0401627 |
|
148 ## 0.0379578 |
|
149 ## -0.0052227 |
|
150 ## @end example |
|
151 ## |
|
152 ## The value returned in @var{info} may be one of the following: |
|
153 ## @table @asis |
|
154 ## @item 101 |
|
155 ## The algorithm terminated because the norm of the last step was less |
|
156 ## than @code{tol * norm (x))} (the value of tol is currently fixed at |
|
157 ## @code{sqrt (eps)}---edit @file{sqp.m} to modify this value. |
|
158 ## @item 102 |
|
159 ## The BFGS update failed. |
|
160 ## @item 103 |
|
161 ## The maximum number of iterations was reached (the maximum number of |
|
162 ## allowed iterations is currently fixed at 100---edit @file{sqp.m} to |
|
163 ## increase this value). |
|
164 ## @end table |
|
165 ## @end deftypefn |
|
166 ## @seealso{qp} |
|
167 |
|
168 function [x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif) |
|
169 |
|
170 global nfun; |
|
171 global __sqp_obj_fun__; |
|
172 global __sqp_ce_fun__; |
|
173 global __sqp_ci_fun__; |
|
174 |
|
175 if (nargin >= 2 && nargin <= 4) |
|
176 |
|
177 ## Choose an initial NxN symmetric positive definite Hessan |
|
178 ## approximation B. |
|
179 |
|
180 n = length (x); |
|
181 |
|
182 B = eye (n, n); |
|
183 |
|
184 ## Evaluate objective function, constraints, and gradients at initial |
|
185 ## value of x. |
|
186 ## |
|
187 ## obj_fun |
|
188 ## obj_grad |
|
189 ## ce_fun -- equality constraint functions |
|
190 ## ci_fun -- inequality constraint functions |
|
191 ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T |
|
192 |
|
193 obj_grd = @fd_obj_grd; |
|
194 have_hess = 0; |
|
195 if (iscell (objf)) |
|
196 if (length (objf) > 0) |
|
197 __sqp_obj_fun__ = obj_fun = objf{1}; |
|
198 if (length (objf) > 1) |
|
199 obj_grd = objf{2}; |
|
200 if (length (objf) > 2) |
|
201 obj_hess = objf{3}; |
|
202 have_hess = 1; |
|
203 B = feval (obj_hess, x); |
|
204 endif |
|
205 endif |
|
206 else |
|
207 error ("sqp: invalid objective function"); |
|
208 endif |
|
209 else |
|
210 __sqp_obj_fun__ = obj_fun = objf; |
|
211 endif |
|
212 |
|
213 ce_fun = @empty_cf; |
|
214 ce_grd = @empty_jac; |
|
215 if (nargin > 2) |
|
216 ce_grd = @fd_ce_jac; |
|
217 if (iscell (cef)) |
|
218 if (length (cef) > 0) |
|
219 __sqp_ce_fun__ = ce_fun = cef{1}; |
|
220 if (length (cef) > 1) |
|
221 ce_grd = cef{2}; |
|
222 endif |
|
223 else |
|
224 error ("sqp: invalid equality constraint function"); |
|
225 endif |
|
226 elseif (! isempty (cef)) |
|
227 ce_fun = cef; |
|
228 endif |
|
229 endif |
|
230 __sqp_ce_fun__ = ce_fun; |
|
231 |
|
232 ci_fun = @empty_cf; |
|
233 ci_grd = @empty_jac; |
|
234 if (nargin > 3) |
|
235 ci_grd = @fd_ci_jac; |
|
236 if (iscell (cif)) |
|
237 if (length (cif) > 0) |
|
238 __sqp_ci_fun__ = ci_fun = cif{1}; |
|
239 if (length (cif) > 1) |
|
240 ci_grd = cif{2}; |
|
241 endif |
|
242 else |
|
243 error ("sqp: invalid equality constraint function"); |
|
244 endif |
|
245 elseif (! isempty (cif)) |
|
246 ci_fun = cif; |
|
247 endif |
|
248 endif |
|
249 __sqp_ci_fun__ = ci_fun; |
|
250 |
|
251 iter_max = 100; |
|
252 |
|
253 iter = 0; |
|
254 |
|
255 obj = feval (obj_fun, x); |
|
256 nfun = 1; |
|
257 |
|
258 c = feval (obj_grd, x); |
|
259 |
|
260 ce = feval (ce_fun, x); |
|
261 F = feval (ce_grd, x); |
|
262 |
|
263 ci = feval (ci_fun, x); |
|
264 C = feval (ci_grd, x); |
|
265 |
|
266 A = [F; C]; |
|
267 |
|
268 ## Choose an initial lambda (x is provided by the caller). |
|
269 |
|
270 lambda = 100 * ones (rows (A), 1); |
|
271 |
|
272 qp_iter = 1; |
|
273 alpha = 1; |
|
274 |
|
275 ## report (); |
|
276 |
|
277 ## report (iter, qp_iter, alpha, nfun, obj); |
|
278 |
|
279 while (++iter < iter_max) |
|
280 |
|
281 ## Check convergence. This is just a simple check on the first |
|
282 ## order necessary conditions. |
|
283 |
|
284 ## IDX is the indices of the active inequality constraints. |
|
285 |
|
286 nr_f = rows (F); |
|
287 |
|
288 lambda_e = lambda((1:nr_f)'); |
|
289 lambda_i = lambda((nr_f+1:end)'); |
|
290 |
|
291 con = [ce; ci]; |
|
292 |
|
293 t0 = norm (c - A' * lambda); |
|
294 t1 = norm (ce); |
|
295 t2 = all (ci >= 0); |
|
296 t3 = all (lambda_i >= 0); |
|
297 t4 = norm (lambda .* con); |
|
298 |
|
299 tol = sqrt (eps); |
|
300 |
|
301 if (t2 && t3 && max ([t0; t1; t4]) < tol) |
|
302 break; |
|
303 endif |
|
304 |
|
305 ## Compute search direction p by solving QP. |
|
306 |
|
307 g = -ce; |
|
308 d = -ci; |
|
309 |
|
310 ## Discard inequality constraints that have -Inf bounds since those |
|
311 ## will never be active. |
|
312 idx = isinf (d) & d < 0; |
|
313 d(idx) = []; |
|
314 C(idx,:) = []; |
|
315 |
|
316 [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C, |
|
317 Inf * ones (size (d))); |
|
318 |
|
319 info = INFO.info; |
|
320 |
|
321 ## Check QP solution and attempt to recover if it has failed. |
|
322 |
|
323 ## Choose mu such that p is a descent direction for the chosen |
|
324 ## merit function phi. |
|
325 |
|
326 [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd, |
|
327 ce_fun, ci_fun, lambda, obj); |
|
328 |
|
329 ## Evaluate objective function, constraints, and gradients at |
|
330 ## x_new. |
|
331 |
|
332 c_new = feval (obj_grd, x_new); |
|
333 |
|
334 ce_new = feval (ce_fun, x_new); |
|
335 F_new = feval (ce_grd, x_new); |
|
336 |
|
337 ci_new = feval (ci_fun, x_new); |
|
338 C_new = feval (ci_grd, x_new); |
|
339 |
|
340 A_new = [F_new; C_new]; |
|
341 |
|
342 ## Set |
|
343 ## |
|
344 ## s = alpha * p |
|
345 ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda}) |
|
346 |
|
347 y = c_new - c; |
|
348 |
|
349 if (! isempty (A)) |
|
350 t = ((A_new - A)'*lambda); |
|
351 y -= t; |
|
352 endif |
|
353 |
|
354 delx = x_new - x; |
|
355 |
|
356 if (norm (delx) < tol * norm (x)) |
|
357 info = 101; |
|
358 break; |
|
359 endif |
|
360 |
|
361 if (have_hess) |
|
362 |
|
363 B = feval (obj_hess, x); |
|
364 |
|
365 else |
|
366 |
|
367 ## Update B using a quasi-Newton formula. |
|
368 |
|
369 delxt = delx'; |
|
370 |
|
371 ## Damped BFGS. Or maybe we would actually want to use the Hessian |
|
372 ## of the Lagrangian, computed directly. |
|
373 |
|
374 d1 = delxt*B*delx; |
|
375 |
|
376 t1 = 0.2 * d1; |
|
377 t2 = delxt*y; |
|
378 |
|
379 if (t2 < t1) |
|
380 theta = 0.8*d1/(d1 - t2); |
|
381 else |
|
382 theta = 1; |
|
383 endif |
|
384 |
|
385 r = theta*y + (1-theta)*B*delx; |
|
386 |
|
387 d2 = delxt*r; |
|
388 |
|
389 if (d1 == 0 || d2 == 0) |
|
390 info = 102; |
|
391 break; |
|
392 endif |
|
393 |
|
394 B = B - B*delx*delxt*B/d1 + r*r'/d2; |
|
395 |
|
396 endif |
|
397 |
|
398 x = x_new; |
|
399 |
|
400 obj = obj_new; |
|
401 |
|
402 c = c_new; |
|
403 |
|
404 ce = ce_new; |
|
405 F = F_new; |
|
406 |
|
407 ci = ci_new; |
|
408 C = C_new; |
|
409 |
|
410 A = A_new; |
|
411 |
|
412 ## report (iter, qp_iter, alpha, nfun, obj); |
|
413 |
|
414 endwhile |
|
415 |
|
416 if (iter >= iter_max) |
|
417 info = 103; |
|
418 endif |
|
419 |
|
420 nf = nfun; |
|
421 |
|
422 else |
|
423 |
|
424 usage ("[x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif)"); |
|
425 |
|
426 endif |
|
427 |
|
428 ### endfunction |
|
429 |
|
430 |
|
431 function [merit, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu) |
|
432 |
|
433 global nfun; |
|
434 |
|
435 ce = feval (ce_fun, x); |
|
436 ci = feval (ci_fun, x); |
|
437 |
|
438 idx = ci < 0; |
|
439 |
|
440 con = [ce; ci(idx)]; |
|
441 |
|
442 if (isempty (obj)) |
|
443 obj = feval (obj_fun, x); |
|
444 nfun++; |
|
445 endif |
|
446 |
|
447 merit = obj; |
|
448 t = norm (con, 1) / mu; |
|
449 |
|
450 if (! isempty (t)) |
|
451 merit += t; |
|
452 endif |
|
453 |
|
454 ### endfunction |
|
455 |
|
456 |
|
457 function [x_new, alpha, obj] = linesearch_L1 (x, p, obj_fun, obj_grd, |
|
458 ce_fun, ci_fun, lambda, obj) |
|
459 |
|
460 ## Choose parameters |
|
461 ## |
|
462 ## eta in the range (0, 0.5) |
|
463 ## tau in the range (0, 1) |
|
464 |
|
465 eta = 0.25; |
|
466 tau = 0.5; |
|
467 |
|
468 delta_bar = sqrt (eps); |
|
469 |
|
470 if (isempty (lambda)) |
|
471 mu = 1 / delta_bar; |
|
472 else |
|
473 mu = 1 / (norm (lambda, Inf) + delta_bar); |
|
474 endif |
|
475 |
|
476 alpha = 1; |
|
477 |
|
478 c = feval (obj_grd, x); |
|
479 ce = feval (ce_fun, x); |
|
480 |
|
481 [phi_x_mu, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu); |
|
482 |
|
483 D_phi_x_mu = c' * p; |
|
484 d = feval (ci_fun, x); |
|
485 ## only those elements of d corresponding |
|
486 ## to violated constraints should be included. |
|
487 idx = d < 0; |
|
488 t = - norm ([ce; d(idx)], 1) / mu; |
|
489 if (! isempty (t)) |
|
490 D_phi_x_mu += t; |
|
491 endif |
|
492 |
|
493 while (1) |
|
494 [p1, obj] = phi_L1 ([], obj_fun, ce_fun, ci_fun, x+alpha*p, mu); |
|
495 p2 = phi_x_mu+eta*alpha*D_phi_x_mu; |
|
496 if (p1 > p2) |
|
497 ## Reset alpha = tau_alpha * alpha for some tau_alpha in the |
|
498 ## range (0, tau). |
|
499 tau_alpha = 0.9 * tau; ## ?? |
|
500 alpha = tau_alpha * alpha; |
|
501 else |
|
502 break; |
|
503 endif |
|
504 endwhile |
|
505 |
|
506 ## Set x_new = x + alpha * p; |
|
507 |
|
508 x_new = x + alpha * p; |
|
509 |
|
510 ### endfunction |
|
511 |
|
512 |
|
513 function report (iter, qp_iter, alpha, nfun, obj) |
|
514 |
|
515 if (nargin == 0) |
|
516 printf (" Itn ItQP Step Nfun Objective\n"); |
|
517 else |
|
518 printf ("%5d %4d %8.1g %5d %13.6e\n", iter, qp_iter, alpha, nfun, obj); |
|
519 endif |
|
520 |
|
521 ### endfunction |
|
522 |
|
523 |
|
524 function grd = fdgrd (f, x) |
|
525 |
|
526 if (! isempty (f)) |
|
527 y0 = feval (f, x); |
|
528 nx = length (x); |
|
529 grd = zeros (nx, 1); |
|
530 deltax = sqrt (eps); |
|
531 for i = 1:nx |
|
532 t = x(i); |
|
533 x(i) += deltax; |
|
534 grd(i) = (feval (f, x) - y0) / deltax; |
|
535 x(i) = t; |
|
536 endfor |
|
537 else |
|
538 grd = zeros (0, 1); |
|
539 endif |
|
540 |
|
541 ### endfunction |
|
542 |
|
543 |
|
544 function jac = fdjac (f, x) |
|
545 |
|
546 if (! isempty (f)) |
|
547 y0 = feval (f, x); |
|
548 nf = length (y0); |
|
549 nx = length (x); |
|
550 jac = zeros (nf, nx); |
|
551 deltax = sqrt (eps); |
|
552 for i = 1:nx |
|
553 t = x(i); |
|
554 x(i) += deltax; |
|
555 jac(:,i) = (feval (f, x) - y0) / deltax; |
|
556 x(i) = t; |
|
557 endfor |
|
558 else |
|
559 jac = zeros (0, nx); |
|
560 endif |
|
561 |
|
562 ### endfunction |
|
563 |
|
564 |
|
565 function grd = fd_obj_grd (x) |
|
566 |
|
567 global __sqp_obj_fun__; |
|
568 |
|
569 grd = fdgrd (__sqp_obj_fun__, x); |
|
570 |
|
571 ### endfunction |
|
572 |
|
573 |
|
574 function res = empty_cf (x) |
|
575 |
|
576 res = zeros (0, 1); |
|
577 |
|
578 ### endfunction |
|
579 |
|
580 |
|
581 function res = empty_jac (x) |
|
582 |
|
583 res = zeros (0, length (x)); |
|
584 |
|
585 ### endfunction |
|
586 |
|
587 |
|
588 function jac = fd_ce_jac (x) |
|
589 |
|
590 global __sqp_ce_fun__; |
|
591 |
|
592 jac = fdjac (__sqp_ce_fun__, x); |
|
593 |
|
594 ### endfunction |
|
595 |
|
596 |
|
597 function jac = fd_ci_jac (x) |
|
598 |
|
599 global __sqp_ci_fun__; |
|
600 |
|
601 jac = fdjac (__sqp_ci_fun__, x); |
|
602 |
|
603 ### endfunction |