comparison libcruft/lapack/slasd2.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
1 SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
2 $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
3 $ IDXC, IDXQ, COLTYP, INFO )
4 *
5 * -- LAPACK auxiliary routine (version 3.1) --
6 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
7 * November 2006
8 *
9 * .. Scalar Arguments ..
10 INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
11 REAL ALPHA, BETA
12 * ..
13 * .. Array Arguments ..
14 INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
15 $ IDXQ( * )
16 REAL D( * ), DSIGMA( * ), U( LDU, * ),
17 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
18 $ Z( * )
19 * ..
20 *
21 * Purpose
22 * =======
23 *
24 * SLASD2 merges the two sets of singular values together into a single
25 * sorted set. Then it tries to deflate the size of the problem.
26 * There are two ways in which deflation can occur: when two or more
27 * singular values are close together or if there is a tiny entry in the
28 * Z vector. For each such occurrence the order of the related secular
29 * equation problem is reduced by one.
30 *
31 * SLASD2 is called from SLASD1.
32 *
33 * Arguments
34 * =========
35 *
36 * NL (input) INTEGER
37 * The row dimension of the upper block. NL >= 1.
38 *
39 * NR (input) INTEGER
40 * The row dimension of the lower block. NR >= 1.
41 *
42 * SQRE (input) INTEGER
43 * = 0: the lower block is an NR-by-NR square matrix.
44 * = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
45 *
46 * The bidiagonal matrix has N = NL + NR + 1 rows and
47 * M = N + SQRE >= N columns.
48 *
49 * K (output) INTEGER
50 * Contains the dimension of the non-deflated matrix,
51 * This is the order of the related secular equation. 1 <= K <=N.
52 *
53 * D (input/output) REAL array, dimension (N)
54 * On entry D contains the singular values of the two submatrices
55 * to be combined. On exit D contains the trailing (N-K) updated
56 * singular values (those which were deflated) sorted into
57 * increasing order.
58 *
59 * Z (output) REAL array, dimension (N)
60 * On exit Z contains the updating row vector in the secular
61 * equation.
62 *
63 * ALPHA (input) REAL
64 * Contains the diagonal element associated with the added row.
65 *
66 * BETA (input) REAL
67 * Contains the off-diagonal element associated with the added
68 * row.
69 *
70 * U (input/output) REAL array, dimension (LDU,N)
71 * On entry U contains the left singular vectors of two
72 * submatrices in the two square blocks with corners at (1,1),
73 * (NL, NL), and (NL+2, NL+2), (N,N).
74 * On exit U contains the trailing (N-K) updated left singular
75 * vectors (those which were deflated) in its last N-K columns.
76 *
77 * LDU (input) INTEGER
78 * The leading dimension of the array U. LDU >= N.
79 *
80 * VT (input/output) REAL array, dimension (LDVT,M)
81 * On entry VT' contains the right singular vectors of two
82 * submatrices in the two square blocks with corners at (1,1),
83 * (NL+1, NL+1), and (NL+2, NL+2), (M,M).
84 * On exit VT' contains the trailing (N-K) updated right singular
85 * vectors (those which were deflated) in its last N-K columns.
86 * In case SQRE =1, the last row of VT spans the right null
87 * space.
88 *
89 * LDVT (input) INTEGER
90 * The leading dimension of the array VT. LDVT >= M.
91 *
92 * DSIGMA (output) REAL array, dimension (N)
93 * Contains a copy of the diagonal elements (K-1 singular values
94 * and one zero) in the secular equation.
95 *
96 * U2 (output) REAL array, dimension (LDU2,N)
97 * Contains a copy of the first K-1 left singular vectors which
98 * will be used by SLASD3 in a matrix multiply (SGEMM) to solve
99 * for the new left singular vectors. U2 is arranged into four
100 * blocks. The first block contains a column with 1 at NL+1 and
101 * zero everywhere else; the second block contains non-zero
102 * entries only at and above NL; the third contains non-zero
103 * entries only below NL+1; and the fourth is dense.
104 *
105 * LDU2 (input) INTEGER
106 * The leading dimension of the array U2. LDU2 >= N.
107 *
108 * VT2 (output) REAL array, dimension (LDVT2,N)
109 * VT2' contains a copy of the first K right singular vectors
110 * which will be used by SLASD3 in a matrix multiply (SGEMM) to
111 * solve for the new right singular vectors. VT2 is arranged into
112 * three blocks. The first block contains a row that corresponds
113 * to the special 0 diagonal element in SIGMA; the second block
114 * contains non-zeros only at and before NL +1; the third block
115 * contains non-zeros only at and after NL +2.
116 *
117 * LDVT2 (input) INTEGER
118 * The leading dimension of the array VT2. LDVT2 >= M.
119 *
120 * IDXP (workspace) INTEGER array, dimension (N)
121 * This will contain the permutation used to place deflated
122 * values of D at the end of the array. On output IDXP(2:K)
123 * points to the nondeflated D-values and IDXP(K+1:N)
124 * points to the deflated singular values.
125 *
126 * IDX (workspace) INTEGER array, dimension (N)
127 * This will contain the permutation used to sort the contents of
128 * D into ascending order.
129 *
130 * IDXC (output) INTEGER array, dimension (N)
131 * This will contain the permutation used to arrange the columns
132 * of the deflated U matrix into three groups: the first group
133 * contains non-zero entries only at and above NL, the second
134 * contains non-zero entries only below NL+2, and the third is
135 * dense.
136 *
137 * IDXQ (input/output) INTEGER array, dimension (N)
138 * This contains the permutation which separately sorts the two
139 * sub-problems in D into ascending order. Note that entries in
140 * the first hlaf of this permutation must first be moved one
141 * position backward; and entries in the second half
142 * must first have NL+1 added to their values.
143 *
144 * COLTYP (workspace/output) INTEGER array, dimension (N)
145 * As workspace, this will contain a label which will indicate
146 * which of the following types a column in the U2 matrix or a
147 * row in the VT2 matrix is:
148 * 1 : non-zero in the upper half only
149 * 2 : non-zero in the lower half only
150 * 3 : dense
151 * 4 : deflated
152 *
153 * On exit, it is an array of dimension 4, with COLTYP(I) being
154 * the dimension of the I-th type columns.
155 *
156 * INFO (output) INTEGER
157 * = 0: successful exit.
158 * < 0: if INFO = -i, the i-th argument had an illegal value.
159 *
160 * Further Details
161 * ===============
162 *
163 * Based on contributions by
164 * Ming Gu and Huan Ren, Computer Science Division, University of
165 * California at Berkeley, USA
166 *
167 * =====================================================================
168 *
169 * .. Parameters ..
170 REAL ZERO, ONE, TWO, EIGHT
171 PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
172 $ EIGHT = 8.0E+0 )
173 * ..
174 * .. Local Arrays ..
175 INTEGER CTOT( 4 ), PSM( 4 )
176 * ..
177 * .. Local Scalars ..
178 INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
179 $ N, NLP1, NLP2
180 REAL C, EPS, HLFTOL, S, TAU, TOL, Z1
181 * ..
182 * .. External Functions ..
183 REAL SLAMCH, SLAPY2
184 EXTERNAL SLAMCH, SLAPY2
185 * ..
186 * .. External Subroutines ..
187 EXTERNAL SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA
188 * ..
189 * .. Intrinsic Functions ..
190 INTRINSIC ABS, MAX
191 * ..
192 * .. Executable Statements ..
193 *
194 * Test the input parameters.
195 *
196 INFO = 0
197 *
198 IF( NL.LT.1 ) THEN
199 INFO = -1
200 ELSE IF( NR.LT.1 ) THEN
201 INFO = -2
202 ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
203 INFO = -3
204 END IF
205 *
206 N = NL + NR + 1
207 M = N + SQRE
208 *
209 IF( LDU.LT.N ) THEN
210 INFO = -10
211 ELSE IF( LDVT.LT.M ) THEN
212 INFO = -12
213 ELSE IF( LDU2.LT.N ) THEN
214 INFO = -15
215 ELSE IF( LDVT2.LT.M ) THEN
216 INFO = -17
217 END IF
218 IF( INFO.NE.0 ) THEN
219 CALL XERBLA( 'SLASD2', -INFO )
220 RETURN
221 END IF
222 *
223 NLP1 = NL + 1
224 NLP2 = NL + 2
225 *
226 * Generate the first part of the vector Z; and move the singular
227 * values in the first part of D one position backward.
228 *
229 Z1 = ALPHA*VT( NLP1, NLP1 )
230 Z( 1 ) = Z1
231 DO 10 I = NL, 1, -1
232 Z( I+1 ) = ALPHA*VT( I, NLP1 )
233 D( I+1 ) = D( I )
234 IDXQ( I+1 ) = IDXQ( I ) + 1
235 10 CONTINUE
236 *
237 * Generate the second part of the vector Z.
238 *
239 DO 20 I = NLP2, M
240 Z( I ) = BETA*VT( I, NLP2 )
241 20 CONTINUE
242 *
243 * Initialize some reference arrays.
244 *
245 DO 30 I = 2, NLP1
246 COLTYP( I ) = 1
247 30 CONTINUE
248 DO 40 I = NLP2, N
249 COLTYP( I ) = 2
250 40 CONTINUE
251 *
252 * Sort the singular values into increasing order
253 *
254 DO 50 I = NLP2, N
255 IDXQ( I ) = IDXQ( I ) + NLP1
256 50 CONTINUE
257 *
258 * DSIGMA, IDXC, IDXC, and the first column of U2
259 * are used as storage space.
260 *
261 DO 60 I = 2, N
262 DSIGMA( I ) = D( IDXQ( I ) )
263 U2( I, 1 ) = Z( IDXQ( I ) )
264 IDXC( I ) = COLTYP( IDXQ( I ) )
265 60 CONTINUE
266 *
267 CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
268 *
269 DO 70 I = 2, N
270 IDXI = 1 + IDX( I )
271 D( I ) = DSIGMA( IDXI )
272 Z( I ) = U2( IDXI, 1 )
273 COLTYP( I ) = IDXC( IDXI )
274 70 CONTINUE
275 *
276 * Calculate the allowable deflation tolerance
277 *
278 EPS = SLAMCH( 'Epsilon' )
279 TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
280 TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
281 *
282 * There are 2 kinds of deflation -- first a value in the z-vector
283 * is small, second two (or more) singular values are very close
284 * together (their difference is small).
285 *
286 * If the value in the z-vector is small, we simply permute the
287 * array so that the corresponding singular value is moved to the
288 * end.
289 *
290 * If two values in the D-vector are close, we perform a two-sided
291 * rotation designed to make one of the corresponding z-vector
292 * entries zero, and then permute the array so that the deflated
293 * singular value is moved to the end.
294 *
295 * If there are multiple singular values then the problem deflates.
296 * Here the number of equal singular values are found. As each equal
297 * singular value is found, an elementary reflector is computed to
298 * rotate the corresponding singular subspace so that the
299 * corresponding components of Z are zero in this new basis.
300 *
301 K = 1
302 K2 = N + 1
303 DO 80 J = 2, N
304 IF( ABS( Z( J ) ).LE.TOL ) THEN
305 *
306 * Deflate due to small z component.
307 *
308 K2 = K2 - 1
309 IDXP( K2 ) = J
310 COLTYP( J ) = 4
311 IF( J.EQ.N )
312 $ GO TO 120
313 ELSE
314 JPREV = J
315 GO TO 90
316 END IF
317 80 CONTINUE
318 90 CONTINUE
319 J = JPREV
320 100 CONTINUE
321 J = J + 1
322 IF( J.GT.N )
323 $ GO TO 110
324 IF( ABS( Z( J ) ).LE.TOL ) THEN
325 *
326 * Deflate due to small z component.
327 *
328 K2 = K2 - 1
329 IDXP( K2 ) = J
330 COLTYP( J ) = 4
331 ELSE
332 *
333 * Check if singular values are close enough to allow deflation.
334 *
335 IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
336 *
337 * Deflation is possible.
338 *
339 S = Z( JPREV )
340 C = Z( J )
341 *
342 * Find sqrt(a**2+b**2) without overflow or
343 * destructive underflow.
344 *
345 TAU = SLAPY2( C, S )
346 C = C / TAU
347 S = -S / TAU
348 Z( J ) = TAU
349 Z( JPREV ) = ZERO
350 *
351 * Apply back the Givens rotation to the left and right
352 * singular vector matrices.
353 *
354 IDXJP = IDXQ( IDX( JPREV )+1 )
355 IDXJ = IDXQ( IDX( J )+1 )
356 IF( IDXJP.LE.NLP1 ) THEN
357 IDXJP = IDXJP - 1
358 END IF
359 IF( IDXJ.LE.NLP1 ) THEN
360 IDXJ = IDXJ - 1
361 END IF
362 CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
363 CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
364 $ S )
365 IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
366 COLTYP( J ) = 3
367 END IF
368 COLTYP( JPREV ) = 4
369 K2 = K2 - 1
370 IDXP( K2 ) = JPREV
371 JPREV = J
372 ELSE
373 K = K + 1
374 U2( K, 1 ) = Z( JPREV )
375 DSIGMA( K ) = D( JPREV )
376 IDXP( K ) = JPREV
377 JPREV = J
378 END IF
379 END IF
380 GO TO 100
381 110 CONTINUE
382 *
383 * Record the last singular value.
384 *
385 K = K + 1
386 U2( K, 1 ) = Z( JPREV )
387 DSIGMA( K ) = D( JPREV )
388 IDXP( K ) = JPREV
389 *
390 120 CONTINUE
391 *
392 * Count up the total number of the various types of columns, then
393 * form a permutation which positions the four column types into
394 * four groups of uniform structure (although one or more of these
395 * groups may be empty).
396 *
397 DO 130 J = 1, 4
398 CTOT( J ) = 0
399 130 CONTINUE
400 DO 140 J = 2, N
401 CT = COLTYP( J )
402 CTOT( CT ) = CTOT( CT ) + 1
403 140 CONTINUE
404 *
405 * PSM(*) = Position in SubMatrix (of types 1 through 4)
406 *
407 PSM( 1 ) = 2
408 PSM( 2 ) = 2 + CTOT( 1 )
409 PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
410 PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
411 *
412 * Fill out the IDXC array so that the permutation which it induces
413 * will place all type-1 columns first, all type-2 columns next,
414 * then all type-3's, and finally all type-4's, starting from the
415 * second column. This applies similarly to the rows of VT.
416 *
417 DO 150 J = 2, N
418 JP = IDXP( J )
419 CT = COLTYP( JP )
420 IDXC( PSM( CT ) ) = J
421 PSM( CT ) = PSM( CT ) + 1
422 150 CONTINUE
423 *
424 * Sort the singular values and corresponding singular vectors into
425 * DSIGMA, U2, and VT2 respectively. The singular values/vectors
426 * which were not deflated go into the first K slots of DSIGMA, U2,
427 * and VT2 respectively, while those which were deflated go into the
428 * last N - K slots, except that the first column/row will be treated
429 * separately.
430 *
431 DO 160 J = 2, N
432 JP = IDXP( J )
433 DSIGMA( J ) = D( JP )
434 IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
435 IF( IDXJ.LE.NLP1 ) THEN
436 IDXJ = IDXJ - 1
437 END IF
438 CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
439 CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
440 160 CONTINUE
441 *
442 * Determine DSIGMA(1), DSIGMA(2) and Z(1)
443 *
444 DSIGMA( 1 ) = ZERO
445 HLFTOL = TOL / TWO
446 IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
447 $ DSIGMA( 2 ) = HLFTOL
448 IF( M.GT.N ) THEN
449 Z( 1 ) = SLAPY2( Z1, Z( M ) )
450 IF( Z( 1 ).LE.TOL ) THEN
451 C = ONE
452 S = ZERO
453 Z( 1 ) = TOL
454 ELSE
455 C = Z1 / Z( 1 )
456 S = Z( M ) / Z( 1 )
457 END IF
458 ELSE
459 IF( ABS( Z1 ).LE.TOL ) THEN
460 Z( 1 ) = TOL
461 ELSE
462 Z( 1 ) = Z1
463 END IF
464 END IF
465 *
466 * Move the rest of the updating row to Z.
467 *
468 CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
469 *
470 * Determine the first column of U2, the first row of VT2 and the
471 * last row of VT.
472 *
473 CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
474 U2( NLP1, 1 ) = ONE
475 IF( M.GT.N ) THEN
476 DO 170 I = 1, NLP1
477 VT( M, I ) = -S*VT( NLP1, I )
478 VT2( 1, I ) = C*VT( NLP1, I )
479 170 CONTINUE
480 DO 180 I = NLP2, M
481 VT2( 1, I ) = S*VT( M, I )
482 VT( M, I ) = C*VT( M, I )
483 180 CONTINUE
484 ELSE
485 CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
486 END IF
487 IF( M.GT.N ) THEN
488 CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
489 END IF
490 *
491 * The deflated singular values and their corresponding vectors go
492 * into the back of D, U, and V respectively.
493 *
494 IF( N.GT.K ) THEN
495 CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
496 CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
497 $ LDU )
498 CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
499 $ LDVT )
500 END IF
501 *
502 * Copy CTOT into COLTYP for referencing in SLASD3.
503 *
504 DO 190 J = 1, 4
505 COLTYP( J ) = CTOT( J )
506 190 CONTINUE
507 *
508 RETURN
509 *
510 * End of SLASD2
511 *
512 END