comparison liboctave/floatQRP.cc @ 8368:c72c1c9bccdc

call blocked permuted qr factorization routines from LAPACK
author Jaroslav Hajek <highegg@gmail.com>
date Thu, 04 Dec 2008 09:15:17 +0100
parents 445d27d79f4e
children e3c9102431a9
comparison
equal deleted inserted replaced
8367:445d27d79f4e 8368:c72c1c9bccdc
32 #include "lo-error.h" 32 #include "lo-error.h"
33 33
34 extern "C" 34 extern "C"
35 { 35 {
36 F77_RET_T 36 F77_RET_T
37 F77_FUNC (sgeqpf, SGEQPF) (const octave_idx_type&, const octave_idx_type&, float*, 37 F77_FUNC (sgeqp3, SGEQP3) (const octave_idx_type&, const octave_idx_type&, float*,
38 const octave_idx_type&, octave_idx_type*, float*, float*, octave_idx_type&); 38 const octave_idx_type&, octave_idx_type*, float*, float*,
39 const octave_idx_type&, octave_idx_type&);
39 40
40 F77_RET_T 41 F77_RET_T
41 F77_FUNC (sorgqr, SORGQR) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&, 42 F77_FUNC (sorgqr, SORGQR) (const octave_idx_type&, const octave_idx_type&, const octave_idx_type&,
42 float*, const octave_idx_type&, float*, float*, 43 float*, const octave_idx_type&, float*, float*,
43 const octave_idx_type&, octave_idx_type&); 44 const octave_idx_type&, octave_idx_type&);
67 68
68 octave_idx_type min_mn = m < n ? m : n; 69 octave_idx_type min_mn = m < n ? m : n;
69 Array<float> tau (min_mn); 70 Array<float> tau (min_mn);
70 float *ptau = tau.fortran_vec (); 71 float *ptau = tau.fortran_vec ();
71 72
72 octave_idx_type lwork = 3*n > 32*m ? 3*n : 32*m;
73 Array<float> work (lwork);
74 float *pwork = work.fortran_vec ();
75
76 octave_idx_type info = 0; 73 octave_idx_type info = 0;
77 74
78 FloatMatrix A_fact = a; 75 FloatMatrix A_fact = a;
79 if (m > n && qr_type != QR::economy) 76 if (m > n && qr_type != QR::economy)
80 A_fact.resize (m, m, 0.0); 77 A_fact.resize (m, m, 0.0);
82 float *tmp_data = A_fact.fortran_vec (); 79 float *tmp_data = A_fact.fortran_vec ();
83 80
84 MArray<octave_idx_type> jpvt (n, 0); 81 MArray<octave_idx_type> jpvt (n, 0);
85 octave_idx_type *pjpvt = jpvt.fortran_vec (); 82 octave_idx_type *pjpvt = jpvt.fortran_vec ();
86 83
84 float rlwork = 0;
85 // Workspace query...
86 F77_XFCN (sgeqp3, SGEQP3, (m, n, tmp_data, m, pjpvt, ptau, &rlwork, -1, info));
87
88 octave_idx_type lwork = rlwork;
89 Array<float> work (lwork);
90 float *pwork = work.fortran_vec ();
91
87 // Code to enforce a certain permutation could go here... 92 // Code to enforce a certain permutation could go here...
88 93
89 F77_XFCN (sgeqpf, SGEQPF, (m, n, tmp_data, m, pjpvt, ptau, pwork, info)); 94 F77_XFCN (sgeqp3, SGEQP3, (m, n, tmp_data, m, pjpvt, ptau, pwork, lwork, info));
90 95
91 // Form Permutation matrix (if economy is requested, return the 96 // Form Permutation matrix (if economy is requested, return the
92 // indices only!) 97 // indices only!)
93 98
94 jpvt -= 1; 99 jpvt -= 1;