octave-lyh: libcruft/lapack/slasd2.f comparison

comparison libcruft/lapack/slasd2.f @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code

author	David Bateman <dbateman@free.fr>
date	Sun, 27 Apr 2008 22:34:17 +0200
parents
children

comparison

equal deleted inserted replaced

-:45f5faba05a2
+:82be108cc558
+SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
+$                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
+$                   IDXC, IDXQ, COLTYP, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.1) --
+*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+*     November 2006
+*
+*     .. Scalar Arguments ..
+INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
+REAL               ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
+$                   IDXQ( * )
+REAL               D( * ), DSIGMA( * ), U( LDU, * ),
+$                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
+$                   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD2 merges the two sets of singular values together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  singular values are close together or if there is a tiny entry in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  SLASD2 is called from SLASD1.
+*
+*  Arguments
+*  =========
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has N = NL + NR + 1 rows and
+*         M = N + SQRE >= N columns.
+*
+*  K      (output) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry D contains the singular values of the two submatrices
+*         to be combined.  On exit D contains the trailing (N-K) updated
+*         singular values (those which were deflated) sorted into
+*         increasing order.
+*
+*  Z      (output) REAL array, dimension (N)
+*         On exit Z contains the updating row vector in the secular
+*         equation.
+*
+*  ALPHA  (input) REAL
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) REAL
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  U      (input/output) REAL array, dimension (LDU,N)
+*         On entry U contains the left singular vectors of two
+*         submatrices in the two square blocks with corners at (1,1),
+*         (NL, NL), and (NL+2, NL+2), (N,N).
+*         On exit U contains the trailing (N-K) updated left singular
+*         vectors (those which were deflated) in its last N-K columns.
+*
+*  LDU    (input) INTEGER
+*         The leading dimension of the array U.  LDU >= N.
+*
+*  VT     (input/output) REAL array, dimension (LDVT,M)
+*         On entry VT' contains the right singular vectors of two
+*         submatrices in the two square blocks with corners at (1,1),
+*         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+*         On exit VT' contains the trailing (N-K) updated right singular
+*         vectors (those which were deflated) in its last N-K columns.
+*         In case SQRE =1, the last row of VT spans the right null
+*         space.
+*
+*  LDVT   (input) INTEGER
+*         The leading dimension of the array VT.  LDVT >= M.
+*
+*  DSIGMA (output) REAL array, dimension (N)
+*         Contains a copy of the diagonal elements (K-1 singular values
+*         and one zero) in the secular equation.
+*
+*  U2     (output) REAL array, dimension (LDU2,N)
+*         Contains a copy of the first K-1 left singular vectors which
+*         will be used by SLASD3 in a matrix multiply (SGEMM) to solve
+*         for the new left singular vectors. U2 is arranged into four
+*         blocks. The first block contains a column with 1 at NL+1 and
+*         zero everywhere else; the second block contains non-zero
+*         entries only at and above NL; the third contains non-zero
+*         entries only below NL+1; and the fourth is dense.
+*
+*  LDU2   (input) INTEGER
+*         The leading dimension of the array U2.  LDU2 >= N.
+*
+*  VT2    (output) REAL array, dimension (LDVT2,N)
+*         VT2' contains a copy of the first K right singular vectors
+*         which will be used by SLASD3 in a matrix multiply (SGEMM) to
+*         solve for the new right singular vectors. VT2 is arranged into
+*         three blocks. The first block contains a row that corresponds
+*         to the special 0 diagonal element in SIGMA; the second block
+*         contains non-zeros only at and before NL +1; the third block
+*         contains non-zeros only at and after  NL +2.
+*
+*  LDVT2  (input) INTEGER
+*         The leading dimension of the array VT2.  LDVT2 >= M.
+*
+*  IDXP   (workspace) INTEGER array, dimension (N)
+*         This will contain the permutation used to place deflated
+*         values of D at the end of the array. On output IDXP(2:K)
+*         points to the nondeflated D-values and IDXP(K+1:N)
+*         points to the deflated singular values.
+*
+*  IDX    (workspace) INTEGER array, dimension (N)
+*         This will contain the permutation used to sort the contents of
+*         D into ascending order.
+*
+*  IDXC   (output) INTEGER array, dimension (N)
+*         This will contain the permutation used to arrange the columns
+*         of the deflated U matrix into three groups:  the first group
+*         contains non-zero entries only at and above NL, the second
+*         contains non-zero entries only below NL+2, and the third is
+*         dense.
+*
+*  IDXQ   (input/output) INTEGER array, dimension (N)
+*         This contains the permutation which separately sorts the two
+*         sub-problems in D into ascending order.  Note that entries in
+*         the first hlaf of this permutation must first be moved one
+*         position backward; and entries in the second half
+*         must first have NL+1 added to their values.
+*
+*  COLTYP (workspace/output) INTEGER array, dimension (N)
+*         As workspace, this will contain a label which will indicate
+*         which of the following types a column in the U2 matrix or a
+*         row in the VT2 matrix is:
+*         1 : non-zero in the upper half only
+*         2 : non-zero in the lower half only
+*         3 : dense
+*         4 : deflated
+*
+*         On exit, it is an array of dimension 4, with COLTYP(I) being
+*         the dimension of the I-th type columns.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+REAL               ZERO, ONE, TWO, EIGHT
+PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
+$                   EIGHT = 8.0E+0 )
+*     ..
+*     .. Local Arrays ..
+INTEGER            CTOT( 4 ), PSM( 4 )
+*     ..
+*     .. Local Scalars ..
+INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
+$                   N, NLP1, NLP2
+REAL               C, EPS, HLFTOL, S, TAU, TOL, Z1
+*     ..
+*     .. External Functions ..
+REAL               SLAMCH, SLAPY2
+EXTERNAL           SLAMCH, SLAPY2
+*     ..
+*     .. External Subroutines ..
+EXTERNAL           SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+INFO = 0
+*
+IF( NL.LT.1 ) THEN
+INFO = -1
+ELSE IF( NR.LT.1 ) THEN
+INFO = -2
+ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
+INFO = -3
+END IF
+*
+N = NL + NR + 1
+M = N + SQRE
+*
+IF( LDU.LT.N ) THEN
+INFO = -10
+ELSE IF( LDVT.LT.M ) THEN
+INFO = -12
+ELSE IF( LDU2.LT.N ) THEN
+INFO = -15
+ELSE IF( LDVT2.LT.M ) THEN
+INFO = -17
+END IF
+IF( INFO.NE.0 ) THEN
+CALL XERBLA( 'SLASD2', -INFO )
+RETURN
+END IF
+*
+NLP1 = NL + 1
+NLP2 = NL + 2
+*
+*     Generate the first part of the vector Z; and move the singular
+*     values in the first part of D one position backward.
+*
+Z1 = ALPHA*VT( NLP1, NLP1 )
+Z( 1 ) = Z1
+DO 10 I = NL, 1, -1
+Z( I+1 ) = ALPHA*VT( I, NLP1 )
+D( I+1 ) = D( I )
+IDXQ( I+1 ) = IDXQ( I ) + 1
+10 CONTINUE
+*
+*     Generate the second part of the vector Z.
+*
+DO 20 I = NLP2, M
+Z( I ) = BETA*VT( I, NLP2 )
+20 CONTINUE
+*
+*     Initialize some reference arrays.
+*
+DO 30 I = 2, NLP1
+COLTYP( I ) = 1
+30 CONTINUE
+DO 40 I = NLP2, N
+COLTYP( I ) = 2
+40 CONTINUE
+*
+*     Sort the singular values into increasing order
+*
+DO 50 I = NLP2, N
+IDXQ( I ) = IDXQ( I ) + NLP1
+50 CONTINUE
+*
+*     DSIGMA, IDXC, IDXC, and the first column of U2
+*     are used as storage space.
+*
+DO 60 I = 2, N
+DSIGMA( I ) = D( IDXQ( I ) )
+U2( I, 1 ) = Z( IDXQ( I ) )
+IDXC( I ) = COLTYP( IDXQ( I ) )
+60 CONTINUE
+*
+CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
+*
+DO 70 I = 2, N
+IDXI = 1 + IDX( I )
+D( I ) = DSIGMA( IDXI )
+Z( I ) = U2( IDXI, 1 )
+COLTYP( I ) = IDXC( IDXI )
+70 CONTINUE
+*
+*     Calculate the allowable deflation tolerance
+*
+EPS = SLAMCH( 'Epsilon' )
+TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
+TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
+*
+*     There are 2 kinds of deflation -- first a value in the z-vector
+*     is small, second two (or more) singular values are very close
+*     together (their difference is small).
+*
+*     If the value in the z-vector is small, we simply permute the
+*     array so that the corresponding singular value is moved to the
+*     end.
+*
+*     If two values in the D-vector are close, we perform a two-sided
+*     rotation designed to make one of the corresponding z-vector
+*     entries zero, and then permute the array so that the deflated
+*     singular value is moved to the end.
+*
+*     If there are multiple singular values then the problem deflates.
+*     Here the number of equal singular values are found.  As each equal
+*     singular value is found, an elementary reflector is computed to
+*     rotate the corresponding singular subspace so that the
+*     corresponding components of Z are zero in this new basis.
+*
+K = 1
+K2 = N + 1
+DO 80 J = 2, N
+IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+K2 = K2 - 1
+IDXP( K2 ) = J
+COLTYP( J ) = 4
+IF( J.EQ.N )
+$         GO TO 120
+ELSE
+JPREV = J
+GO TO 90
+END IF
+80 CONTINUE
+90 CONTINUE
+J = JPREV
+100 CONTINUE
+J = J + 1
+IF( J.GT.N )
+$   GO TO 110
+IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+K2 = K2 - 1
+IDXP( K2 ) = J
+COLTYP( J ) = 4
+ELSE
+*
+*        Check if singular values are close enough to allow deflation.
+*
+IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+S = Z( JPREV )
+C = Z( J )
+*
+*           Find sqrt(a**2+b**2) without overflow or
+*           destructive underflow.
+*
+TAU = SLAPY2( C, S )
+C = C / TAU
+S = -S / TAU
+Z( J ) = TAU
+Z( JPREV ) = ZERO
+*
+*           Apply back the Givens rotation to the left and right
+*           singular vector matrices.
+*
+IDXJP = IDXQ( IDX( JPREV )+1 )
+IDXJ = IDXQ( IDX( J )+1 )
+IF( IDXJP.LE.NLP1 ) THEN
+IDXJP = IDXJP - 1
+END IF
+IF( IDXJ.LE.NLP1 ) THEN
+IDXJ = IDXJ - 1
+END IF
+CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
+CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
+$                 S )
+IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
+COLTYP( J ) = 3
+END IF
+COLTYP( JPREV ) = 4
+K2 = K2 - 1
+IDXP( K2 ) = JPREV
+JPREV = J
+ELSE
+K = K + 1
+U2( K, 1 ) = Z( JPREV )
+DSIGMA( K ) = D( JPREV )
+IDXP( K ) = JPREV
+JPREV = J
+END IF
+END IF
+GO TO 100
+110 CONTINUE
+*
+*     Record the last singular value.
+*
+K = K + 1
+U2( K, 1 ) = Z( JPREV )
+DSIGMA( K ) = D( JPREV )
+IDXP( K ) = JPREV
+*
+120 CONTINUE
+*
+*     Count up the total number of the various types of columns, then
+*     form a permutation which positions the four column types into
+*     four groups of uniform structure (although one or more of these
+*     groups may be empty).
+*
+DO 130 J = 1, 4
+CTOT( J ) = 0
+130 CONTINUE
+DO 140 J = 2, N
+CT = COLTYP( J )
+CTOT( CT ) = CTOT( CT ) + 1
+140 CONTINUE
+*
+*     PSM(*) = Position in SubMatrix (of types 1 through 4)
+*
+PSM( 1 ) = 2
+PSM( 2 ) = 2 + CTOT( 1 )
+PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
+PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
+*
+*     Fill out the IDXC array so that the permutation which it induces
+*     will place all type-1 columns first, all type-2 columns next,
+*     then all type-3's, and finally all type-4's, starting from the
+*     second column. This applies similarly to the rows of VT.
+*
+DO 150 J = 2, N
+JP = IDXP( J )
+CT = COLTYP( JP )
+IDXC( PSM( CT ) ) = J
+PSM( CT ) = PSM( CT ) + 1
+150 CONTINUE
+*
+*     Sort the singular values and corresponding singular vectors into
+*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
+*     which were not deflated go into the first K slots of DSIGMA, U2,
+*     and VT2 respectively, while those which were deflated go into the
+*     last N - K slots, except that the first column/row will be treated
+*     separately.
+*
+DO 160 J = 2, N
+JP = IDXP( J )
+DSIGMA( J ) = D( JP )
+IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
+IF( IDXJ.LE.NLP1 ) THEN
+IDXJ = IDXJ - 1
+END IF
+CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
+CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
+160 CONTINUE
+*
+*     Determine DSIGMA(1), DSIGMA(2) and Z(1)
+*
+DSIGMA( 1 ) = ZERO
+HLFTOL = TOL / TWO
+IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
+$   DSIGMA( 2 ) = HLFTOL
+IF( M.GT.N ) THEN
+Z( 1 ) = SLAPY2( Z1, Z( M ) )
+IF( Z( 1 ).LE.TOL ) THEN
+C = ONE
+S = ZERO
+Z( 1 ) = TOL
+ELSE
+C = Z1 / Z( 1 )
+S = Z( M ) / Z( 1 )
+END IF
+ELSE
+IF( ABS( Z1 ).LE.TOL ) THEN
+Z( 1 ) = TOL
+ELSE
+Z( 1 ) = Z1
+END IF
+END IF
+*
+*     Move the rest of the updating row to Z.
+*
+CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
+*
+*     Determine the first column of U2, the first row of VT2 and the
+*     last row of VT.
+*
+CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
+U2( NLP1, 1 ) = ONE
+IF( M.GT.N ) THEN
+DO 170 I = 1, NLP1
+VT( M, I ) = -S*VT( NLP1, I )
+VT2( 1, I ) = C*VT( NLP1, I )
+170    CONTINUE
+DO 180 I = NLP2, M
+VT2( 1, I ) = S*VT( M, I )
+VT( M, I ) = C*VT( M, I )
+180    CONTINUE
+ELSE
+CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
+END IF
+IF( M.GT.N ) THEN
+CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
+END IF
+*
+*     The deflated singular values and their corresponding vectors go
+*     into the back of D, U, and V respectively.
+*
+IF( N.GT.K ) THEN
+CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
+CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
+$                LDU )
+CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
+$                LDVT )
+END IF
+*
+*     Copy CTOT into COLTYP for referencing in SLASD3.
+*
+DO 190 J = 1, 4
+COLTYP( J ) = CTOT( J )
+190 CONTINUE
+*
+RETURN
+*
+*     End of SLASD2
+*
+END

Mercurial > hg > octave-lyh

comparison libcruft/lapack/slasd2.f @ 7789:82be108cc558