changeset 8368:c72c1c9bccdc

call blocked permuted qr factorization routines from LAPACK
author Jaroslav Hajek <highegg@gmail.com>
date Thu, 04 Dec 2008 09:15:17 +0100
parents 445d27d79f4e
children 584d9e80556b
files liboctave/ChangeLog liboctave/CmplxQRP.cc liboctave/dbleQRP.cc liboctave/fCmplxQRP.cc liboctave/floatQRP.cc
diffstat 5 files changed, 57 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/ChangeLog
+++ b/liboctave/ChangeLog
@@ -1,3 +1,10 @@
+2008-12-04  Jaroslav Hajek  <highegg@gmail.com>
+
+	* dbleQRP.cc (QRP::QRP): Call DGEQP3 rather than DGEQPF.
+	* floatQRP.cc (FloatQRP::FloatQRP): Call SGEQP3 rather than SGEQPF.
+	* CmplxQRP.cc (ComplexQRP::ComplexQRP): Call ZGEQP3 rather than ZGEQPF.
+	* fCmplxQRP.cc (FloatComplexQRP::FloatComplexQRP): Call CGEQP3 rather than CGEQPF.
+
 2008-12-03  Jaroslav Hajek  <highegg@gmail.com>
 
 	* PermMatrix.h, PermMatrix.cc: New sources.
--- a/liboctave/CmplxQRP.cc
+++ b/liboctave/CmplxQRP.cc
@@ -34,9 +34,9 @@
 extern "C"
 {
   F77_RET_T
-  F77_FUNC (zgeqpf, ZGEQPF) (const octave_idx_type&, const octave_idx_type&, Complex*,
+  F77_FUNC (zgeqp3, ZGEQP3) (const octave_idx_type&, const octave_idx_type&, Complex*,
 			     const octave_idx_type&, octave_idx_type*, Complex*, Complex*,
-			     double*, octave_idx_type&);
+			     const octave_idx_type&, double*, octave_idx_type&);
 
   F77_RET_T
   F77_FUNC (zungqr, ZUNGQR) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&,
@@ -71,10 +71,6 @@
   Array<Complex> tau (min_mn);
   Complex *ptau = tau.fortran_vec ();
 
-  octave_idx_type lwork = 3*n > 32*m ? 3*n : 32*m;
-  Array<Complex> work (lwork);
-  Complex *pwork = work.fortran_vec ();
-
   octave_idx_type info = 0;
 
   ComplexMatrix A_fact = a;
@@ -89,10 +85,19 @@
   MArray<octave_idx_type> jpvt (n, 0);
   octave_idx_type *pjpvt = jpvt.fortran_vec ();
 
+  Complex rlwork = 0;
+  // Workspace query...
+  F77_XFCN (zgeqp3, ZGEQP3, (m, n, tmp_data, m, pjpvt, ptau, &rlwork,
+			     -1, prwork, info));
+
+  octave_idx_type lwork = rlwork.real ();
+  Array<Complex> work (lwork);
+  Complex *pwork = work.fortran_vec ();
+
   // Code to enforce a certain permutation could go here...
 
-  F77_XFCN (zgeqpf, ZGEQPF, (m, n, tmp_data, m, pjpvt, ptau, pwork,
-			     prwork, info));
+  F77_XFCN (zgeqp3, ZGEQP3, (m, n, tmp_data, m, pjpvt, ptau, pwork,
+			     lwork, prwork, info));
 
   // Form Permutation matrix (if economy is requested, return the
   // indices only!)
--- a/liboctave/dbleQRP.cc
+++ b/liboctave/dbleQRP.cc
@@ -34,8 +34,9 @@
 extern "C"
 {
   F77_RET_T
-  F77_FUNC (dgeqpf, DGEQPF) (const octave_idx_type&, const octave_idx_type&, double*,
-			     const octave_idx_type&, octave_idx_type*, double*, double*, octave_idx_type&);
+  F77_FUNC (dgeqp3, DGEQP3) (const octave_idx_type&, const octave_idx_type&, double*,
+			     const octave_idx_type&, octave_idx_type*, double*, double*,
+                             const octave_idx_type&, octave_idx_type&);
 
   F77_RET_T
   F77_FUNC (dorgqr, DORGQR) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&,
@@ -69,10 +70,6 @@
   Array<double> tau (min_mn);
   double *ptau = tau.fortran_vec ();
 
-  octave_idx_type lwork = 3*n > 32*m ? 3*n : 32*m;
-  Array<double> work (lwork);
-  double *pwork = work.fortran_vec ();
-
   octave_idx_type info = 0;
 
   Matrix A_fact = a;
@@ -84,9 +81,17 @@
   MArray<octave_idx_type> jpvt (n, 0);
   octave_idx_type *pjpvt = jpvt.fortran_vec ();
 
+  double rlwork = 0;
+  // Workspace query...
+  F77_XFCN (dgeqp3, DGEQP3, (m, n, tmp_data, m, pjpvt, ptau, &rlwork, -1, info));
+
+  octave_idx_type lwork = rlwork;
+  Array<double> work (lwork);
+  double *pwork = work.fortran_vec ();
+
   // Code to enforce a certain permutation could go here...
 
-  F77_XFCN (dgeqpf, DGEQPF, (m, n, tmp_data, m, pjpvt, ptau, pwork, info));
+  F77_XFCN (dgeqp3, DGEQP3, (m, n, tmp_data, m, pjpvt, ptau, pwork, lwork, info));
 
   // Form Permutation matrix (if economy is requested, return the
   // indices only!)
--- a/liboctave/fCmplxQRP.cc
+++ b/liboctave/fCmplxQRP.cc
@@ -34,9 +34,9 @@
 extern "C"
 {
   F77_RET_T
-  F77_FUNC (cgeqpf, CGEQPF) (const octave_idx_type&, const octave_idx_type&, FloatComplex*,
+  F77_FUNC (cgeqp3, CGEQP3) (const octave_idx_type&, const octave_idx_type&, FloatComplex*,
 			     const octave_idx_type&, octave_idx_type*, FloatComplex*, FloatComplex*,
-			     float*, octave_idx_type&);
+			     const octave_idx_type&, float*, octave_idx_type&);
 
   F77_RET_T
   F77_FUNC (cungqr, CUNGQR) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&,
@@ -71,10 +71,6 @@
   Array<FloatComplex> tau (min_mn);
   FloatComplex *ptau = tau.fortran_vec ();
 
-  octave_idx_type lwork = 3*n > 32*m ? 3*n : 32*m;
-  Array<FloatComplex> work (lwork);
-  FloatComplex *pwork = work.fortran_vec ();
-
   octave_idx_type info = 0;
 
   FloatComplexMatrix A_fact = a;
@@ -89,10 +85,19 @@
   MArray<octave_idx_type> jpvt (n, 0);
   octave_idx_type *pjpvt = jpvt.fortran_vec ();
 
+  FloatComplex rlwork = 0;
+  // Workspace query...
+  F77_XFCN (cgeqp3, CGEQP3, (m, n, tmp_data, m, pjpvt, ptau, &rlwork,
+			     -1, prwork, info));
+
+  octave_idx_type lwork = rlwork.real ();
+  Array<FloatComplex> work (lwork);
+  FloatComplex *pwork = work.fortran_vec ();
+
   // Code to enforce a certain permutation could go here...
 
-  F77_XFCN (cgeqpf, CGEQPF, (m, n, tmp_data, m, pjpvt, ptau, pwork,
-			     prwork, info));
+  F77_XFCN (cgeqp3, CGEQP3, (m, n, tmp_data, m, pjpvt, ptau, pwork,
+			     lwork, prwork, info));
 
   // Form Permutation matrix (if economy is requested, return the
   // indices only!)
--- a/liboctave/floatQRP.cc
+++ b/liboctave/floatQRP.cc
@@ -34,8 +34,9 @@
 extern "C"
 {
   F77_RET_T
-  F77_FUNC (sgeqpf, SGEQPF) (const octave_idx_type&, const octave_idx_type&, float*,
-			     const octave_idx_type&, octave_idx_type*, float*, float*, octave_idx_type&);
+  F77_FUNC (sgeqp3, SGEQP3) (const octave_idx_type&, const octave_idx_type&, float*,
+			     const octave_idx_type&, octave_idx_type*, float*, float*,
+                             const octave_idx_type&, octave_idx_type&);
 
   F77_RET_T
   F77_FUNC (sorgqr, SORGQR) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&,
@@ -69,10 +70,6 @@
   Array<float> tau (min_mn);
   float *ptau = tau.fortran_vec ();
 
-  octave_idx_type lwork = 3*n > 32*m ? 3*n : 32*m;
-  Array<float> work (lwork);
-  float *pwork = work.fortran_vec ();
-
   octave_idx_type info = 0;
 
   FloatMatrix A_fact = a;
@@ -84,9 +81,17 @@
   MArray<octave_idx_type> jpvt (n, 0);
   octave_idx_type *pjpvt = jpvt.fortran_vec ();
 
+  float rlwork = 0;
+  // Workspace query...
+  F77_XFCN (sgeqp3, SGEQP3, (m, n, tmp_data, m, pjpvt, ptau, &rlwork, -1, info));
+
+  octave_idx_type lwork = rlwork;
+  Array<float> work (lwork);
+  float *pwork = work.fortran_vec ();
+
   // Code to enforce a certain permutation could go here...
 
-  F77_XFCN (sgeqpf, SGEQPF, (m, n, tmp_data, m, pjpvt, ptau, pwork, info));
+  F77_XFCN (sgeqp3, SGEQP3, (m, n, tmp_data, m, pjpvt, ptau, pwork, lwork, info));
 
   // Form Permutation matrix (if economy is requested, return the
   // indices only!)