diff liboctave/floatQRP.cc @ 8368:c72c1c9bccdc

call blocked permuted qr factorization routines from LAPACK
author Jaroslav Hajek <highegg@gmail.com>
date Thu, 04 Dec 2008 09:15:17 +0100
parents 445d27d79f4e
children e3c9102431a9
line wrap: on
line diff
--- a/liboctave/floatQRP.cc
+++ b/liboctave/floatQRP.cc
@@ -34,8 +34,9 @@
 extern "C"
 {
   F77_RET_T
-  F77_FUNC (sgeqpf, SGEQPF) (const octave_idx_type&, const octave_idx_type&, float*,
-			     const octave_idx_type&, octave_idx_type*, float*, float*, octave_idx_type&);
+  F77_FUNC (sgeqp3, SGEQP3) (const octave_idx_type&, const octave_idx_type&, float*,
+			     const octave_idx_type&, octave_idx_type*, float*, float*,
+                             const octave_idx_type&, octave_idx_type&);
 
   F77_RET_T
   F77_FUNC (sorgqr, SORGQR) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&,
@@ -69,10 +70,6 @@
   Array<float> tau (min_mn);
   float *ptau = tau.fortran_vec ();
 
-  octave_idx_type lwork = 3*n > 32*m ? 3*n : 32*m;
-  Array<float> work (lwork);
-  float *pwork = work.fortran_vec ();
-
   octave_idx_type info = 0;
 
   FloatMatrix A_fact = a;
@@ -84,9 +81,17 @@
   MArray<octave_idx_type> jpvt (n, 0);
   octave_idx_type *pjpvt = jpvt.fortran_vec ();
 
+  float rlwork = 0;
+  // Workspace query...
+  F77_XFCN (sgeqp3, SGEQP3, (m, n, tmp_data, m, pjpvt, ptau, &rlwork, -1, info));
+
+  octave_idx_type lwork = rlwork;
+  Array<float> work (lwork);
+  float *pwork = work.fortran_vec ();
+
   // Code to enforce a certain permutation could go here...
 
-  F77_XFCN (sgeqpf, SGEQPF, (m, n, tmp_data, m, pjpvt, ptau, pwork, info));
+  F77_XFCN (sgeqp3, SGEQP3, (m, n, tmp_data, m, pjpvt, ptau, pwork, lwork, info));
 
   // Form Permutation matrix (if economy is requested, return the
   // indices only!)