# HG changeset patch # User Jason Riedy # Date 1202868267 18000 # Node ID e9f10b4c05cf3e7ae35764e5186294079ee76ffd # Parent aa5208636bea0299901e0073211f7ee1215809b8 fix workspace size calculation for xGELSD diff --git a/liboctave/CMatrix.cc b/liboctave/CMatrix.cc --- a/liboctave/CMatrix.cc +++ b/liboctave/CMatrix.cc @@ -63,6 +63,13 @@ extern "C" { + octave_idx_type + F77_FUNC (ilaenv, ILAENV) (const octave_idx_type&, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, + const octave_idx_type&, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL F77_CHAR_ARG_LEN_DECL); + F77_RET_T F77_FUNC (zgebal, ZGEBAL) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, Complex*, const octave_idx_type&, octave_idx_type&, @@ -2492,8 +2499,12 @@ Array work (1); - // FIXME: Can SMLSIZ be other than 25? - octave_idx_type smlsiz = 25; + const octave_idx_type smlsiz + = F77_FUNC (ilaenv, ILAENV) (9, F77_CONST_CHAR_ARG2 ("ZGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0 + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); // We compute the size of rwork and iwork because ZGELSD in // older versions of LAPACK does not return them on a query @@ -2526,6 +2537,29 @@ ps, rcond, rank, work.fortran_vec (), lwork, prwork, piwork, info)); + // The workspace query is broken in at least LAPACK 3.0.0 + // through 3.1.1 when n > m. The obtuse formula below + // should provide sufficient workspace for DGELSD to operate + // efficiently. + if (n > m) + { + octave_idx_type addend = m; + + if (2*m-4 > addend) + addend = 2*m-4; + + if (nrhs > addend) + addend = nrhs; + + if (n-3*m > addend) + addend = n-3*m; + + const octave_idx_type lworkaround = 4*m + m*m + addend; + + if (std::real (work(0)) < lworkaround) + work(0) = lworkaround; + } + if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in zgelsd"); diff --git a/liboctave/ChangeLog b/liboctave/ChangeLog --- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,3 +1,14 @@ +2008-02-12 Jason Riedy + + * dMatrix.cc (ILAENV): Declare LAPACK Fortran function. + (Matrix::lssolve): Use ILAENV to query smlsiz. And add an ugly + workaround for DGELSD's broken lwork query. The formula is from + LAPACK's dgelsd.f source and allocates enough workspace to use an + efficient algorithm in the short-and-fat case (n > m). + * CMatrix.cc (ILAENV): Declare LAPACK Fortran function. + (ComplexMatrix::lssolve): Use ILAENV to query smlsiz. And add an + ugly workaround for DGELSD's broken lwork query, as with double. + 2008-02-12 John W. Eaton * sparse-sort.cc: Don't explicitly instantiate diff --git a/liboctave/dMatrix.cc b/liboctave/dMatrix.cc --- a/liboctave/dMatrix.cc +++ b/liboctave/dMatrix.cc @@ -59,6 +59,13 @@ extern "C" { + octave_idx_type + F77_FUNC (ilaenv, ILAENV) (const octave_idx_type&, F77_CONST_CHAR_ARG_DECL, + F77_CONST_CHAR_ARG_DECL, + const octave_idx_type&, const octave_idx_type&, + const octave_idx_type&, const octave_idx_type& + F77_CHAR_ARG_LEN_DECL F77_CHAR_ARG_LEN_DECL); + F77_RET_T F77_FUNC (dgebal, DGEBAL) (F77_CONST_CHAR_ARG_DECL, const octave_idx_type&, double*, const octave_idx_type&, octave_idx_type&, @@ -2103,8 +2110,12 @@ Array work (1); - // FIXME: Can SMLSIZ be other than 25? - octave_idx_type smlsiz = 25; + const octave_idx_type smlsiz + = F77_FUNC (ilaenv, ILAENV) (9, F77_CONST_CHAR_ARG2 ("DGELSD", 6), + F77_CONST_CHAR_ARG2 (" ", 1), + 0, 0, 0, 0 + F77_CHAR_ARG_LEN (6) + F77_CHAR_ARG_LEN (1)); // We compute the size of iwork because DGELSD in older versions // of LAPACK does not return it on a query call. @@ -2129,6 +2140,35 @@ ps, rcond, rank, work.fortran_vec (), lwork, piwork, info)); + // The workspace query is broken in at least LAPACK 3.0.0 + // through 3.1.1 when n > m. The obtuse formula below + // should provide sufficient workspace for DGELSD to operate + // efficiently. + if (n > m) + { + const octave_idx_type wlalsd + = 9*m + 2*m*smlsiz + 8*m*nlvl + m*nrhs + (smlsiz+1)*(smlsiz+1); + + octave_idx_type addend = m; + + if (2*m-4 > addend) + addend = 2*m-4; + + if (nrhs > addend) + addend = nrhs; + + if (n-3*m > addend) + addend = n-3*m; + + if (wlalsd > addend) + addend = wlalsd; + + const octave_idx_type lworkaround = 4*m + m*m + addend; + + if (work(0) < lworkaround) + work(0) = lworkaround; + } + if (f77_exception_encountered) (*current_liboctave_error_handler) ("unrecoverable error in dgelsd");