Mercurial > hg > octave-lyh
comparison libcruft/lapack/slasd3.f @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
7788:45f5faba05a2 | 7789:82be108cc558 |
---|---|
1 SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, | |
2 $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, | |
3 $ INFO ) | |
4 * | |
5 * -- LAPACK auxiliary routine (version 3.1) -- | |
6 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. | |
7 * November 2006 | |
8 * | |
9 * .. Scalar Arguments .. | |
10 INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, | |
11 $ SQRE | |
12 * .. | |
13 * .. Array Arguments .. | |
14 INTEGER CTOT( * ), IDXC( * ) | |
15 REAL D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), | |
16 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), | |
17 $ Z( * ) | |
18 * .. | |
19 * | |
20 * Purpose | |
21 * ======= | |
22 * | |
23 * SLASD3 finds all the square roots of the roots of the secular | |
24 * equation, as defined by the values in D and Z. It makes the | |
25 * appropriate calls to SLASD4 and then updates the singular | |
26 * vectors by matrix multiplication. | |
27 * | |
28 * This code makes very mild assumptions about floating point | |
29 * arithmetic. It will work on machines with a guard digit in | |
30 * add/subtract, or on those binary machines without guard digits | |
31 * which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. | |
32 * It could conceivably fail on hexadecimal or decimal machines | |
33 * without guard digits, but we know of none. | |
34 * | |
35 * SLASD3 is called from SLASD1. | |
36 * | |
37 * Arguments | |
38 * ========= | |
39 * | |
40 * NL (input) INTEGER | |
41 * The row dimension of the upper block. NL >= 1. | |
42 * | |
43 * NR (input) INTEGER | |
44 * The row dimension of the lower block. NR >= 1. | |
45 * | |
46 * SQRE (input) INTEGER | |
47 * = 0: the lower block is an NR-by-NR square matrix. | |
48 * = 1: the lower block is an NR-by-(NR+1) rectangular matrix. | |
49 * | |
50 * The bidiagonal matrix has N = NL + NR + 1 rows and | |
51 * M = N + SQRE >= N columns. | |
52 * | |
53 * K (input) INTEGER | |
54 * The size of the secular equation, 1 =< K = < N. | |
55 * | |
56 * D (output) REAL array, dimension(K) | |
57 * On exit the square roots of the roots of the secular equation, | |
58 * in ascending order. | |
59 * | |
60 * Q (workspace) REAL array, | |
61 * dimension at least (LDQ,K). | |
62 * | |
63 * LDQ (input) INTEGER | |
64 * The leading dimension of the array Q. LDQ >= K. | |
65 * | |
66 * DSIGMA (input/output) REAL array, dimension(K) | |
67 * The first K elements of this array contain the old roots | |
68 * of the deflated updating problem. These are the poles | |
69 * of the secular equation. | |
70 * | |
71 * U (output) REAL array, dimension (LDU, N) | |
72 * The last N - K columns of this matrix contain the deflated | |
73 * left singular vectors. | |
74 * | |
75 * LDU (input) INTEGER | |
76 * The leading dimension of the array U. LDU >= N. | |
77 * | |
78 * U2 (input) REAL array, dimension (LDU2, N) | |
79 * The first K columns of this matrix contain the non-deflated | |
80 * left singular vectors for the split problem. | |
81 * | |
82 * LDU2 (input) INTEGER | |
83 * The leading dimension of the array U2. LDU2 >= N. | |
84 * | |
85 * VT (output) REAL array, dimension (LDVT, M) | |
86 * The last M - K columns of VT' contain the deflated | |
87 * right singular vectors. | |
88 * | |
89 * LDVT (input) INTEGER | |
90 * The leading dimension of the array VT. LDVT >= N. | |
91 * | |
92 * VT2 (input/output) REAL array, dimension (LDVT2, N) | |
93 * The first K columns of VT2' contain the non-deflated | |
94 * right singular vectors for the split problem. | |
95 * | |
96 * LDVT2 (input) INTEGER | |
97 * The leading dimension of the array VT2. LDVT2 >= N. | |
98 * | |
99 * IDXC (input) INTEGER array, dimension (N) | |
100 * The permutation used to arrange the columns of U (and rows of | |
101 * VT) into three groups: the first group contains non-zero | |
102 * entries only at and above (or before) NL +1; the second | |
103 * contains non-zero entries only at and below (or after) NL+2; | |
104 * and the third is dense. The first column of U and the row of | |
105 * VT are treated separately, however. | |
106 * | |
107 * The rows of the singular vectors found by SLASD4 | |
108 * must be likewise permuted before the matrix multiplies can | |
109 * take place. | |
110 * | |
111 * CTOT (input) INTEGER array, dimension (4) | |
112 * A count of the total number of the various types of columns | |
113 * in U (or rows in VT), as described in IDXC. The fourth column | |
114 * type is any column which has been deflated. | |
115 * | |
116 * Z (input/output) REAL array, dimension (K) | |
117 * The first K elements of this array contain the components | |
118 * of the deflation-adjusted updating row vector. | |
119 * | |
120 * INFO (output) INTEGER | |
121 * = 0: successful exit. | |
122 * < 0: if INFO = -i, the i-th argument had an illegal value. | |
123 * > 0: if INFO = 1, an singular value did not converge | |
124 * | |
125 * Further Details | |
126 * =============== | |
127 * | |
128 * Based on contributions by | |
129 * Ming Gu and Huan Ren, Computer Science Division, University of | |
130 * California at Berkeley, USA | |
131 * | |
132 * ===================================================================== | |
133 * | |
134 * .. Parameters .. | |
135 REAL ONE, ZERO, NEGONE | |
136 PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0, | |
137 $ NEGONE = -1.0E+0 ) | |
138 * .. | |
139 * .. Local Scalars .. | |
140 INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1 | |
141 REAL RHO, TEMP | |
142 * .. | |
143 * .. External Functions .. | |
144 REAL SLAMC3, SNRM2 | |
145 EXTERNAL SLAMC3, SNRM2 | |
146 * .. | |
147 * .. External Subroutines .. | |
148 EXTERNAL SCOPY, SGEMM, SLACPY, SLASCL, SLASD4, XERBLA | |
149 * .. | |
150 * .. Intrinsic Functions .. | |
151 INTRINSIC ABS, SIGN, SQRT | |
152 * .. | |
153 * .. Executable Statements .. | |
154 * | |
155 * Test the input parameters. | |
156 * | |
157 INFO = 0 | |
158 * | |
159 IF( NL.LT.1 ) THEN | |
160 INFO = -1 | |
161 ELSE IF( NR.LT.1 ) THEN | |
162 INFO = -2 | |
163 ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN | |
164 INFO = -3 | |
165 END IF | |
166 * | |
167 N = NL + NR + 1 | |
168 M = N + SQRE | |
169 NLP1 = NL + 1 | |
170 NLP2 = NL + 2 | |
171 * | |
172 IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN | |
173 INFO = -4 | |
174 ELSE IF( LDQ.LT.K ) THEN | |
175 INFO = -7 | |
176 ELSE IF( LDU.LT.N ) THEN | |
177 INFO = -10 | |
178 ELSE IF( LDU2.LT.N ) THEN | |
179 INFO = -12 | |
180 ELSE IF( LDVT.LT.M ) THEN | |
181 INFO = -14 | |
182 ELSE IF( LDVT2.LT.M ) THEN | |
183 INFO = -16 | |
184 END IF | |
185 IF( INFO.NE.0 ) THEN | |
186 CALL XERBLA( 'SLASD3', -INFO ) | |
187 RETURN | |
188 END IF | |
189 * | |
190 * Quick return if possible | |
191 * | |
192 IF( K.EQ.1 ) THEN | |
193 D( 1 ) = ABS( Z( 1 ) ) | |
194 CALL SCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT ) | |
195 IF( Z( 1 ).GT.ZERO ) THEN | |
196 CALL SCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 ) | |
197 ELSE | |
198 DO 10 I = 1, N | |
199 U( I, 1 ) = -U2( I, 1 ) | |
200 10 CONTINUE | |
201 END IF | |
202 RETURN | |
203 END IF | |
204 * | |
205 * Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can | |
206 * be computed with high relative accuracy (barring over/underflow). | |
207 * This is a problem on machines without a guard digit in | |
208 * add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). | |
209 * The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), | |
210 * which on any of these machines zeros out the bottommost | |
211 * bit of DSIGMA(I) if it is 1; this makes the subsequent | |
212 * subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation | |
213 * occurs. On binary machines with a guard digit (almost all | |
214 * machines) it does not change DSIGMA(I) at all. On hexadecimal | |
215 * and decimal machines with a guard digit, it slightly | |
216 * changes the bottommost bits of DSIGMA(I). It does not account | |
217 * for hexadecimal or decimal machines without guard digits | |
218 * (we know of none). We use a subroutine call to compute | |
219 * 2*DSIGMA(I) to prevent optimizing compilers from eliminating | |
220 * this code. | |
221 * | |
222 DO 20 I = 1, K | |
223 DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) | |
224 20 CONTINUE | |
225 * | |
226 * Keep a copy of Z. | |
227 * | |
228 CALL SCOPY( K, Z, 1, Q, 1 ) | |
229 * | |
230 * Normalize Z. | |
231 * | |
232 RHO = SNRM2( K, Z, 1 ) | |
233 CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) | |
234 RHO = RHO*RHO | |
235 * | |
236 * Find the new singular values. | |
237 * | |
238 DO 30 J = 1, K | |
239 CALL SLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ), | |
240 $ VT( 1, J ), INFO ) | |
241 * | |
242 * If the zero finder fails, the computation is terminated. | |
243 * | |
244 IF( INFO.NE.0 ) THEN | |
245 RETURN | |
246 END IF | |
247 30 CONTINUE | |
248 * | |
249 * Compute updated Z. | |
250 * | |
251 DO 60 I = 1, K | |
252 Z( I ) = U( I, K )*VT( I, K ) | |
253 DO 40 J = 1, I - 1 | |
254 Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / | |
255 $ ( DSIGMA( I )-DSIGMA( J ) ) / | |
256 $ ( DSIGMA( I )+DSIGMA( J ) ) ) | |
257 40 CONTINUE | |
258 DO 50 J = I, K - 1 | |
259 Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / | |
260 $ ( DSIGMA( I )-DSIGMA( J+1 ) ) / | |
261 $ ( DSIGMA( I )+DSIGMA( J+1 ) ) ) | |
262 50 CONTINUE | |
263 Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) ) | |
264 60 CONTINUE | |
265 * | |
266 * Compute left singular vectors of the modified diagonal matrix, | |
267 * and store related information for the right singular vectors. | |
268 * | |
269 DO 90 I = 1, K | |
270 VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I ) | |
271 U( 1, I ) = NEGONE | |
272 DO 70 J = 2, K | |
273 VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I ) | |
274 U( J, I ) = DSIGMA( J )*VT( J, I ) | |
275 70 CONTINUE | |
276 TEMP = SNRM2( K, U( 1, I ), 1 ) | |
277 Q( 1, I ) = U( 1, I ) / TEMP | |
278 DO 80 J = 2, K | |
279 JC = IDXC( J ) | |
280 Q( J, I ) = U( JC, I ) / TEMP | |
281 80 CONTINUE | |
282 90 CONTINUE | |
283 * | |
284 * Update the left singular vector matrix. | |
285 * | |
286 IF( K.EQ.2 ) THEN | |
287 CALL SGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U, | |
288 $ LDU ) | |
289 GO TO 100 | |
290 END IF | |
291 IF( CTOT( 1 ).GT.0 ) THEN | |
292 CALL SGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2, | |
293 $ Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) | |
294 IF( CTOT( 3 ).GT.0 ) THEN | |
295 KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) | |
296 CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), | |
297 $ LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU ) | |
298 END IF | |
299 ELSE IF( CTOT( 3 ).GT.0 ) THEN | |
300 KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) | |
301 CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), | |
302 $ LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) | |
303 ELSE | |
304 CALL SLACPY( 'F', NL, K, U2, LDU2, U, LDU ) | |
305 END IF | |
306 CALL SCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU ) | |
307 KTEMP = 2 + CTOT( 1 ) | |
308 CTEMP = CTOT( 2 ) + CTOT( 3 ) | |
309 CALL SGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2, | |
310 $ Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU ) | |
311 * | |
312 * Generate the right singular vectors. | |
313 * | |
314 100 CONTINUE | |
315 DO 120 I = 1, K | |
316 TEMP = SNRM2( K, VT( 1, I ), 1 ) | |
317 Q( I, 1 ) = VT( 1, I ) / TEMP | |
318 DO 110 J = 2, K | |
319 JC = IDXC( J ) | |
320 Q( I, J ) = VT( JC, I ) / TEMP | |
321 110 CONTINUE | |
322 120 CONTINUE | |
323 * | |
324 * Update the right singular vector matrix. | |
325 * | |
326 IF( K.EQ.2 ) THEN | |
327 CALL SGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO, | |
328 $ VT, LDVT ) | |
329 RETURN | |
330 END IF | |
331 KTEMP = 1 + CTOT( 1 ) | |
332 CALL SGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ, | |
333 $ VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT ) | |
334 KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) | |
335 IF( KTEMP.LE.LDVT2 ) | |
336 $ CALL SGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ), | |
337 $ LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ), | |
338 $ LDVT ) | |
339 * | |
340 KTEMP = CTOT( 1 ) + 1 | |
341 NRP1 = NR + SQRE | |
342 IF( KTEMP.GT.1 ) THEN | |
343 DO 130 I = 1, K | |
344 Q( I, KTEMP ) = Q( I, 1 ) | |
345 130 CONTINUE | |
346 DO 140 I = NLP2, M | |
347 VT2( KTEMP, I ) = VT2( 1, I ) | |
348 140 CONTINUE | |
349 END IF | |
350 CTEMP = 1 + CTOT( 2 ) + CTOT( 3 ) | |
351 CALL SGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ, | |
352 $ VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT ) | |
353 * | |
354 RETURN | |
355 * | |
356 * End of SLASD3 | |
357 * | |
358 END |