changeset 11640:6c36c5d3c38b release-3-0-x

fix workspace size calculation for xGELSD
author Jason Riedy
date Tue, 12 Feb 2008 21:04:34 -0500
parents 78f183f65ca2
children 1202614e8a73
files liboctave/CMatrix.cc liboctave/ChangeLog liboctave/dMatrix.cc
diffstat 3 files changed, 89 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/CMatrix.cc
+++ b/liboctave/CMatrix.cc
@@ -63,6 +63,13 @@
 
 extern "C"
 {
+  octave_idx_type
+  F77_FUNC (ilaenv, ILAENV) (const octave_idx_type&, F77_CONST_CHAR_ARG_DECL,
+			     F77_CONST_CHAR_ARG_DECL,
+			     const octave_idx_type&, const octave_idx_type&,
+			     const octave_idx_type&, const octave_idx_type&
+			     F77_CHAR_ARG_LEN_DECL F77_CHAR_ARG_LEN_DECL);
+
   F77_RET_T
   F77_FUNC (zgebal, ZGEBAL) (F77_CONST_CHAR_ARG_DECL,
 			     const octave_idx_type&, Complex*, const octave_idx_type&, octave_idx_type&,
@@ -2492,8 +2499,12 @@
 
       Array<Complex> work (1);
 
-      // FIXME: Can SMLSIZ be other than 25?
-      octave_idx_type smlsiz = 25;
+      const octave_idx_type smlsiz
+	= F77_FUNC (ilaenv, ILAENV) (9, F77_CONST_CHAR_ARG2 ("ZGELSD", 6),
+				     F77_CONST_CHAR_ARG2 (" ", 1),
+				     0, 0, 0, 0
+				     F77_CHAR_ARG_LEN (6)
+				     F77_CHAR_ARG_LEN (1));
 
       // We compute the size of rwork and iwork because ZGELSD in
       // older versions of LAPACK does not return them on a query
@@ -2526,6 +2537,29 @@
 				 ps, rcond, rank, work.fortran_vec (),
 				 lwork, prwork, piwork, info));
 
+      // The workspace query is broken in at least LAPACK 3.0.0
+      // through 3.1.1 when n > m.  The obtuse formula below
+      // should provide sufficient workspace for DGELSD to operate
+      // efficiently.
+      if (n > m)
+	{
+	  octave_idx_type addend = m;
+
+	  if (2*m-4 > addend)
+	    addend = 2*m-4;
+
+	  if (nrhs > addend)
+	    addend = nrhs;
+
+	  if (n-3*m > addend)
+	    addend = n-3*m;
+
+	  const octave_idx_type lworkaround = 4*m + m*m + addend;
+
+	  if (std::real (work(0)) < lworkaround)
+	    work(0) = lworkaround;
+	}
+
       if (f77_exception_encountered)
 	(*current_liboctave_error_handler) 
 	  ("unrecoverable error in zgelsd");
--- a/liboctave/ChangeLog
+++ b/liboctave/ChangeLog
@@ -1,3 +1,14 @@
+2008-02-12  Jason Riedy  <ejr@cs.berkeley.edu>
+  
+	* dMatrix.cc (ILAENV): Declare LAPACK Fortran function.
+	(Matrix::lssolve): Use ILAENV to query smlsiz.  And add an ugly
+	workaround for DGELSD's broken lwork query.  The formula is from
+	LAPACK's dgelsd.f source and allocates enough workspace to use an
+	efficient algorithm in the short-and-fat case (n > m).
+	* CMatrix.cc (ILAENV): Declare LAPACK Fortran function.
+	(ComplexMatrix::lssolve): Use ILAENV to query smlsiz.  And add an
+	ugly workaround for ZGELSD's broken lwork query, as with double.
+
 2008-01-22  Michael Goffioul  <michael.goffioul@gmail.com>
 
 	* oct-time.cc (octave_base_tim::init): Validate pointer argument;
--- a/liboctave/dMatrix.cc
+++ b/liboctave/dMatrix.cc
@@ -59,6 +59,13 @@
 
 extern "C"
 {
+  octave_idx_type
+  F77_FUNC (ilaenv, ILAENV) (const octave_idx_type&, F77_CONST_CHAR_ARG_DECL,
+			     F77_CONST_CHAR_ARG_DECL,
+			     const octave_idx_type&, const octave_idx_type&,
+			     const octave_idx_type&, const octave_idx_type&
+			     F77_CHAR_ARG_LEN_DECL F77_CHAR_ARG_LEN_DECL);
+
   F77_RET_T
   F77_FUNC (dgebal, DGEBAL) (F77_CONST_CHAR_ARG_DECL,
 			     const octave_idx_type&, double*, const octave_idx_type&, octave_idx_type&,
@@ -2103,8 +2110,12 @@
 
       Array<double> work (1);
 
-      // FIXME: Can SMLSIZ be other than 25?
-      octave_idx_type smlsiz = 25;
+      const octave_idx_type smlsiz
+	= F77_FUNC (ilaenv, ILAENV) (9, F77_CONST_CHAR_ARG2 ("DGELSD", 6),
+				     F77_CONST_CHAR_ARG2 (" ", 1),
+				     0, 0, 0, 0
+				     F77_CHAR_ARG_LEN (6)
+				     F77_CHAR_ARG_LEN (1));
 
       // We compute the size of iwork because DGELSD in older versions
       // of LAPACK does not return it on a query call.
@@ -2129,6 +2140,35 @@
 				 ps, rcond, rank, work.fortran_vec (),
 				 lwork, piwork, info));
 
+      // The workspace query is broken in at least LAPACK 3.0.0
+      // through 3.1.1 when n > m.  The obtuse formula below
+      // should provide sufficient workspace for DGELSD to operate
+      // efficiently.
+      if (n > m)
+	{
+	  const octave_idx_type wlalsd
+	    = 9*m + 2*m*smlsiz + 8*m*nlvl + m*nrhs + (smlsiz+1)*(smlsiz+1);
+
+	  octave_idx_type addend = m;
+
+	  if (2*m-4 > addend)
+	    addend = 2*m-4;
+
+	  if (nrhs > addend)
+	    addend = nrhs;
+
+	  if (n-3*m > addend)
+	    addend = n-3*m;
+
+	  if (wlalsd > addend)
+	    addend = wlalsd;
+
+	  const octave_idx_type lworkaround = 4*m + m*m + addend;
+
+	  if (work(0) < lworkaround)
+	    work(0) = lworkaround;
+	}
+
       if (f77_exception_encountered)
 	(*current_liboctave_error_handler) 
 	  ("unrecoverable error in dgelsd");