Mercurial > hg > octave-lyh
comparison libcruft/lapack/slasd2.f @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
7788:45f5faba05a2 | 7789:82be108cc558 |
---|---|
1 SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, | |
2 $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, | |
3 $ IDXC, IDXQ, COLTYP, INFO ) | |
4 * | |
5 * -- LAPACK auxiliary routine (version 3.1) -- | |
6 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. | |
7 * November 2006 | |
8 * | |
9 * .. Scalar Arguments .. | |
10 INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE | |
11 REAL ALPHA, BETA | |
12 * .. | |
13 * .. Array Arguments .. | |
14 INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), | |
15 $ IDXQ( * ) | |
16 REAL D( * ), DSIGMA( * ), U( LDU, * ), | |
17 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), | |
18 $ Z( * ) | |
19 * .. | |
20 * | |
21 * Purpose | |
22 * ======= | |
23 * | |
24 * SLASD2 merges the two sets of singular values together into a single | |
25 * sorted set. Then it tries to deflate the size of the problem. | |
26 * There are two ways in which deflation can occur: when two or more | |
27 * singular values are close together or if there is a tiny entry in the | |
28 * Z vector. For each such occurrence the order of the related secular | |
29 * equation problem is reduced by one. | |
30 * | |
31 * SLASD2 is called from SLASD1. | |
32 * | |
33 * Arguments | |
34 * ========= | |
35 * | |
36 * NL (input) INTEGER | |
37 * The row dimension of the upper block. NL >= 1. | |
38 * | |
39 * NR (input) INTEGER | |
40 * The row dimension of the lower block. NR >= 1. | |
41 * | |
42 * SQRE (input) INTEGER | |
43 * = 0: the lower block is an NR-by-NR square matrix. | |
44 * = 1: the lower block is an NR-by-(NR+1) rectangular matrix. | |
45 * | |
46 * The bidiagonal matrix has N = NL + NR + 1 rows and | |
47 * M = N + SQRE >= N columns. | |
48 * | |
49 * K (output) INTEGER | |
50 * Contains the dimension of the non-deflated matrix, | |
51 * This is the order of the related secular equation. 1 <= K <=N. | |
52 * | |
53 * D (input/output) REAL array, dimension (N) | |
54 * On entry D contains the singular values of the two submatrices | |
55 * to be combined. On exit D contains the trailing (N-K) updated | |
56 * singular values (those which were deflated) sorted into | |
57 * increasing order. | |
58 * | |
59 * Z (output) REAL array, dimension (N) | |
60 * On exit Z contains the updating row vector in the secular | |
61 * equation. | |
62 * | |
63 * ALPHA (input) REAL | |
64 * Contains the diagonal element associated with the added row. | |
65 * | |
66 * BETA (input) REAL | |
67 * Contains the off-diagonal element associated with the added | |
68 * row. | |
69 * | |
70 * U (input/output) REAL array, dimension (LDU,N) | |
71 * On entry U contains the left singular vectors of two | |
72 * submatrices in the two square blocks with corners at (1,1), | |
73 * (NL, NL), and (NL+2, NL+2), (N,N). | |
74 * On exit U contains the trailing (N-K) updated left singular | |
75 * vectors (those which were deflated) in its last N-K columns. | |
76 * | |
77 * LDU (input) INTEGER | |
78 * The leading dimension of the array U. LDU >= N. | |
79 * | |
80 * VT (input/output) REAL array, dimension (LDVT,M) | |
81 * On entry VT' contains the right singular vectors of two | |
82 * submatrices in the two square blocks with corners at (1,1), | |
83 * (NL+1, NL+1), and (NL+2, NL+2), (M,M). | |
84 * On exit VT' contains the trailing (N-K) updated right singular | |
85 * vectors (those which were deflated) in its last N-K columns. | |
86 * In case SQRE =1, the last row of VT spans the right null | |
87 * space. | |
88 * | |
89 * LDVT (input) INTEGER | |
90 * The leading dimension of the array VT. LDVT >= M. | |
91 * | |
92 * DSIGMA (output) REAL array, dimension (N) | |
93 * Contains a copy of the diagonal elements (K-1 singular values | |
94 * and one zero) in the secular equation. | |
95 * | |
96 * U2 (output) REAL array, dimension (LDU2,N) | |
97 * Contains a copy of the first K-1 left singular vectors which | |
98 * will be used by SLASD3 in a matrix multiply (SGEMM) to solve | |
99 * for the new left singular vectors. U2 is arranged into four | |
100 * blocks. The first block contains a column with 1 at NL+1 and | |
101 * zero everywhere else; the second block contains non-zero | |
102 * entries only at and above NL; the third contains non-zero | |
103 * entries only below NL+1; and the fourth is dense. | |
104 * | |
105 * LDU2 (input) INTEGER | |
106 * The leading dimension of the array U2. LDU2 >= N. | |
107 * | |
108 * VT2 (output) REAL array, dimension (LDVT2,N) | |
109 * VT2' contains a copy of the first K right singular vectors | |
110 * which will be used by SLASD3 in a matrix multiply (SGEMM) to | |
111 * solve for the new right singular vectors. VT2 is arranged into | |
112 * three blocks. The first block contains a row that corresponds | |
113 * to the special 0 diagonal element in SIGMA; the second block | |
114 * contains non-zeros only at and before NL +1; the third block | |
115 * contains non-zeros only at and after NL +2. | |
116 * | |
117 * LDVT2 (input) INTEGER | |
118 * The leading dimension of the array VT2. LDVT2 >= M. | |
119 * | |
120 * IDXP (workspace) INTEGER array, dimension (N) | |
121 * This will contain the permutation used to place deflated | |
122 * values of D at the end of the array. On output IDXP(2:K) | |
123 * points to the nondeflated D-values and IDXP(K+1:N) | |
124 * points to the deflated singular values. | |
125 * | |
126 * IDX (workspace) INTEGER array, dimension (N) | |
127 * This will contain the permutation used to sort the contents of | |
128 * D into ascending order. | |
129 * | |
130 * IDXC (output) INTEGER array, dimension (N) | |
131 * This will contain the permutation used to arrange the columns | |
132 * of the deflated U matrix into three groups: the first group | |
133 * contains non-zero entries only at and above NL, the second | |
134 * contains non-zero entries only below NL+2, and the third is | |
135 * dense. | |
136 * | |
137 * IDXQ (input/output) INTEGER array, dimension (N) | |
138 * This contains the permutation which separately sorts the two | |
139 * sub-problems in D into ascending order. Note that entries in | |
140 * the first hlaf of this permutation must first be moved one | |
141 * position backward; and entries in the second half | |
142 * must first have NL+1 added to their values. | |
143 * | |
144 * COLTYP (workspace/output) INTEGER array, dimension (N) | |
145 * As workspace, this will contain a label which will indicate | |
146 * which of the following types a column in the U2 matrix or a | |
147 * row in the VT2 matrix is: | |
148 * 1 : non-zero in the upper half only | |
149 * 2 : non-zero in the lower half only | |
150 * 3 : dense | |
151 * 4 : deflated | |
152 * | |
153 * On exit, it is an array of dimension 4, with COLTYP(I) being | |
154 * the dimension of the I-th type columns. | |
155 * | |
156 * INFO (output) INTEGER | |
157 * = 0: successful exit. | |
158 * < 0: if INFO = -i, the i-th argument had an illegal value. | |
159 * | |
160 * Further Details | |
161 * =============== | |
162 * | |
163 * Based on contributions by | |
164 * Ming Gu and Huan Ren, Computer Science Division, University of | |
165 * California at Berkeley, USA | |
166 * | |
167 * ===================================================================== | |
168 * | |
169 * .. Parameters .. | |
170 REAL ZERO, ONE, TWO, EIGHT | |
171 PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, | |
172 $ EIGHT = 8.0E+0 ) | |
173 * .. | |
174 * .. Local Arrays .. | |
175 INTEGER CTOT( 4 ), PSM( 4 ) | |
176 * .. | |
177 * .. Local Scalars .. | |
178 INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, | |
179 $ N, NLP1, NLP2 | |
180 REAL C, EPS, HLFTOL, S, TAU, TOL, Z1 | |
181 * .. | |
182 * .. External Functions .. | |
183 REAL SLAMCH, SLAPY2 | |
184 EXTERNAL SLAMCH, SLAPY2 | |
185 * .. | |
186 * .. External Subroutines .. | |
187 EXTERNAL SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA | |
188 * .. | |
189 * .. Intrinsic Functions .. | |
190 INTRINSIC ABS, MAX | |
191 * .. | |
192 * .. Executable Statements .. | |
193 * | |
194 * Test the input parameters. | |
195 * | |
196 INFO = 0 | |
197 * | |
198 IF( NL.LT.1 ) THEN | |
199 INFO = -1 | |
200 ELSE IF( NR.LT.1 ) THEN | |
201 INFO = -2 | |
202 ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN | |
203 INFO = -3 | |
204 END IF | |
205 * | |
206 N = NL + NR + 1 | |
207 M = N + SQRE | |
208 * | |
209 IF( LDU.LT.N ) THEN | |
210 INFO = -10 | |
211 ELSE IF( LDVT.LT.M ) THEN | |
212 INFO = -12 | |
213 ELSE IF( LDU2.LT.N ) THEN | |
214 INFO = -15 | |
215 ELSE IF( LDVT2.LT.M ) THEN | |
216 INFO = -17 | |
217 END IF | |
218 IF( INFO.NE.0 ) THEN | |
219 CALL XERBLA( 'SLASD2', -INFO ) | |
220 RETURN | |
221 END IF | |
222 * | |
223 NLP1 = NL + 1 | |
224 NLP2 = NL + 2 | |
225 * | |
226 * Generate the first part of the vector Z; and move the singular | |
227 * values in the first part of D one position backward. | |
228 * | |
229 Z1 = ALPHA*VT( NLP1, NLP1 ) | |
230 Z( 1 ) = Z1 | |
231 DO 10 I = NL, 1, -1 | |
232 Z( I+1 ) = ALPHA*VT( I, NLP1 ) | |
233 D( I+1 ) = D( I ) | |
234 IDXQ( I+1 ) = IDXQ( I ) + 1 | |
235 10 CONTINUE | |
236 * | |
237 * Generate the second part of the vector Z. | |
238 * | |
239 DO 20 I = NLP2, M | |
240 Z( I ) = BETA*VT( I, NLP2 ) | |
241 20 CONTINUE | |
242 * | |
243 * Initialize some reference arrays. | |
244 * | |
245 DO 30 I = 2, NLP1 | |
246 COLTYP( I ) = 1 | |
247 30 CONTINUE | |
248 DO 40 I = NLP2, N | |
249 COLTYP( I ) = 2 | |
250 40 CONTINUE | |
251 * | |
252 * Sort the singular values into increasing order | |
253 * | |
254 DO 50 I = NLP2, N | |
255 IDXQ( I ) = IDXQ( I ) + NLP1 | |
256 50 CONTINUE | |
257 * | |
258 * DSIGMA, IDXC, IDXC, and the first column of U2 | |
259 * are used as storage space. | |
260 * | |
261 DO 60 I = 2, N | |
262 DSIGMA( I ) = D( IDXQ( I ) ) | |
263 U2( I, 1 ) = Z( IDXQ( I ) ) | |
264 IDXC( I ) = COLTYP( IDXQ( I ) ) | |
265 60 CONTINUE | |
266 * | |
267 CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) | |
268 * | |
269 DO 70 I = 2, N | |
270 IDXI = 1 + IDX( I ) | |
271 D( I ) = DSIGMA( IDXI ) | |
272 Z( I ) = U2( IDXI, 1 ) | |
273 COLTYP( I ) = IDXC( IDXI ) | |
274 70 CONTINUE | |
275 * | |
276 * Calculate the allowable deflation tolerance | |
277 * | |
278 EPS = SLAMCH( 'Epsilon' ) | |
279 TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) | |
280 TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) | |
281 * | |
282 * There are 2 kinds of deflation -- first a value in the z-vector | |
283 * is small, second two (or more) singular values are very close | |
284 * together (their difference is small). | |
285 * | |
286 * If the value in the z-vector is small, we simply permute the | |
287 * array so that the corresponding singular value is moved to the | |
288 * end. | |
289 * | |
290 * If two values in the D-vector are close, we perform a two-sided | |
291 * rotation designed to make one of the corresponding z-vector | |
292 * entries zero, and then permute the array so that the deflated | |
293 * singular value is moved to the end. | |
294 * | |
295 * If there are multiple singular values then the problem deflates. | |
296 * Here the number of equal singular values are found. As each equal | |
297 * singular value is found, an elementary reflector is computed to | |
298 * rotate the corresponding singular subspace so that the | |
299 * corresponding components of Z are zero in this new basis. | |
300 * | |
301 K = 1 | |
302 K2 = N + 1 | |
303 DO 80 J = 2, N | |
304 IF( ABS( Z( J ) ).LE.TOL ) THEN | |
305 * | |
306 * Deflate due to small z component. | |
307 * | |
308 K2 = K2 - 1 | |
309 IDXP( K2 ) = J | |
310 COLTYP( J ) = 4 | |
311 IF( J.EQ.N ) | |
312 $ GO TO 120 | |
313 ELSE | |
314 JPREV = J | |
315 GO TO 90 | |
316 END IF | |
317 80 CONTINUE | |
318 90 CONTINUE | |
319 J = JPREV | |
320 100 CONTINUE | |
321 J = J + 1 | |
322 IF( J.GT.N ) | |
323 $ GO TO 110 | |
324 IF( ABS( Z( J ) ).LE.TOL ) THEN | |
325 * | |
326 * Deflate due to small z component. | |
327 * | |
328 K2 = K2 - 1 | |
329 IDXP( K2 ) = J | |
330 COLTYP( J ) = 4 | |
331 ELSE | |
332 * | |
333 * Check if singular values are close enough to allow deflation. | |
334 * | |
335 IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN | |
336 * | |
337 * Deflation is possible. | |
338 * | |
339 S = Z( JPREV ) | |
340 C = Z( J ) | |
341 * | |
342 * Find sqrt(a**2+b**2) without overflow or | |
343 * destructive underflow. | |
344 * | |
345 TAU = SLAPY2( C, S ) | |
346 C = C / TAU | |
347 S = -S / TAU | |
348 Z( J ) = TAU | |
349 Z( JPREV ) = ZERO | |
350 * | |
351 * Apply back the Givens rotation to the left and right | |
352 * singular vector matrices. | |
353 * | |
354 IDXJP = IDXQ( IDX( JPREV )+1 ) | |
355 IDXJ = IDXQ( IDX( J )+1 ) | |
356 IF( IDXJP.LE.NLP1 ) THEN | |
357 IDXJP = IDXJP - 1 | |
358 END IF | |
359 IF( IDXJ.LE.NLP1 ) THEN | |
360 IDXJ = IDXJ - 1 | |
361 END IF | |
362 CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S ) | |
363 CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C, | |
364 $ S ) | |
365 IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN | |
366 COLTYP( J ) = 3 | |
367 END IF | |
368 COLTYP( JPREV ) = 4 | |
369 K2 = K2 - 1 | |
370 IDXP( K2 ) = JPREV | |
371 JPREV = J | |
372 ELSE | |
373 K = K + 1 | |
374 U2( K, 1 ) = Z( JPREV ) | |
375 DSIGMA( K ) = D( JPREV ) | |
376 IDXP( K ) = JPREV | |
377 JPREV = J | |
378 END IF | |
379 END IF | |
380 GO TO 100 | |
381 110 CONTINUE | |
382 * | |
383 * Record the last singular value. | |
384 * | |
385 K = K + 1 | |
386 U2( K, 1 ) = Z( JPREV ) | |
387 DSIGMA( K ) = D( JPREV ) | |
388 IDXP( K ) = JPREV | |
389 * | |
390 120 CONTINUE | |
391 * | |
392 * Count up the total number of the various types of columns, then | |
393 * form a permutation which positions the four column types into | |
394 * four groups of uniform structure (although one or more of these | |
395 * groups may be empty). | |
396 * | |
397 DO 130 J = 1, 4 | |
398 CTOT( J ) = 0 | |
399 130 CONTINUE | |
400 DO 140 J = 2, N | |
401 CT = COLTYP( J ) | |
402 CTOT( CT ) = CTOT( CT ) + 1 | |
403 140 CONTINUE | |
404 * | |
405 * PSM(*) = Position in SubMatrix (of types 1 through 4) | |
406 * | |
407 PSM( 1 ) = 2 | |
408 PSM( 2 ) = 2 + CTOT( 1 ) | |
409 PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) | |
410 PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) | |
411 * | |
412 * Fill out the IDXC array so that the permutation which it induces | |
413 * will place all type-1 columns first, all type-2 columns next, | |
414 * then all type-3's, and finally all type-4's, starting from the | |
415 * second column. This applies similarly to the rows of VT. | |
416 * | |
417 DO 150 J = 2, N | |
418 JP = IDXP( J ) | |
419 CT = COLTYP( JP ) | |
420 IDXC( PSM( CT ) ) = J | |
421 PSM( CT ) = PSM( CT ) + 1 | |
422 150 CONTINUE | |
423 * | |
424 * Sort the singular values and corresponding singular vectors into | |
425 * DSIGMA, U2, and VT2 respectively. The singular values/vectors | |
426 * which were not deflated go into the first K slots of DSIGMA, U2, | |
427 * and VT2 respectively, while those which were deflated go into the | |
428 * last N - K slots, except that the first column/row will be treated | |
429 * separately. | |
430 * | |
431 DO 160 J = 2, N | |
432 JP = IDXP( J ) | |
433 DSIGMA( J ) = D( JP ) | |
434 IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 ) | |
435 IF( IDXJ.LE.NLP1 ) THEN | |
436 IDXJ = IDXJ - 1 | |
437 END IF | |
438 CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 ) | |
439 CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 ) | |
440 160 CONTINUE | |
441 * | |
442 * Determine DSIGMA(1), DSIGMA(2) and Z(1) | |
443 * | |
444 DSIGMA( 1 ) = ZERO | |
445 HLFTOL = TOL / TWO | |
446 IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) | |
447 $ DSIGMA( 2 ) = HLFTOL | |
448 IF( M.GT.N ) THEN | |
449 Z( 1 ) = SLAPY2( Z1, Z( M ) ) | |
450 IF( Z( 1 ).LE.TOL ) THEN | |
451 C = ONE | |
452 S = ZERO | |
453 Z( 1 ) = TOL | |
454 ELSE | |
455 C = Z1 / Z( 1 ) | |
456 S = Z( M ) / Z( 1 ) | |
457 END IF | |
458 ELSE | |
459 IF( ABS( Z1 ).LE.TOL ) THEN | |
460 Z( 1 ) = TOL | |
461 ELSE | |
462 Z( 1 ) = Z1 | |
463 END IF | |
464 END IF | |
465 * | |
466 * Move the rest of the updating row to Z. | |
467 * | |
468 CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 ) | |
469 * | |
470 * Determine the first column of U2, the first row of VT2 and the | |
471 * last row of VT. | |
472 * | |
473 CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 ) | |
474 U2( NLP1, 1 ) = ONE | |
475 IF( M.GT.N ) THEN | |
476 DO 170 I = 1, NLP1 | |
477 VT( M, I ) = -S*VT( NLP1, I ) | |
478 VT2( 1, I ) = C*VT( NLP1, I ) | |
479 170 CONTINUE | |
480 DO 180 I = NLP2, M | |
481 VT2( 1, I ) = S*VT( M, I ) | |
482 VT( M, I ) = C*VT( M, I ) | |
483 180 CONTINUE | |
484 ELSE | |
485 CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 ) | |
486 END IF | |
487 IF( M.GT.N ) THEN | |
488 CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 ) | |
489 END IF | |
490 * | |
491 * The deflated singular values and their corresponding vectors go | |
492 * into the back of D, U, and V respectively. | |
493 * | |
494 IF( N.GT.K ) THEN | |
495 CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) | |
496 CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ), | |
497 $ LDU ) | |
498 CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ), | |
499 $ LDVT ) | |
500 END IF | |
501 * | |
502 * Copy CTOT into COLTYP for referencing in SLASD3. | |
503 * | |
504 DO 190 J = 1, 4 | |
505 COLTYP( J ) = CTOT( J ) | |
506 190 CONTINUE | |
507 * | |
508 RETURN | |
509 * | |
510 * End of SLASD2 | |
511 * | |
512 END |