Mercurial > hg > octave-lyh
annotate liboctave/floatQRP.cc @ 10396:a0b51ac0f88a
optimize accumdim with summation
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Fri, 05 Mar 2010 12:31:30 +0100 |
parents | 12884915a8e4 |
children | 4d1fc073fbb7 |
rev | line source |
---|---|
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
1 /* |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
2 |
8920 | 3 Copyright (C) 1994, 1995, 1996, 1997, 2002, 2003, 2004, 2005, 2007, |
4 2008, 2009 John W. Eaton | |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
5 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
6 This file is part of Octave. |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
7 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
8 Octave is free software; you can redistribute it and/or modify it |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
9 under the terms of the GNU General Public License as published by the |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
10 Free Software Foundation; either version 3 of the License, or (at your |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
11 option) any later version. |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
12 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
13 Octave is distributed in the hope that it will be useful, but WITHOUT |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
16 for more details. |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
17 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
18 You should have received a copy of the GNU General Public License |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
19 along with Octave; see the file COPYING. If not, see |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
20 <http://www.gnu.org/licenses/>. |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
21 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
22 */ |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
23 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
24 #ifdef HAVE_CONFIG_H |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
25 #include <config.h> |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
26 #endif |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
27 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
28 #include <cassert> |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
29 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
30 #include "floatQRP.h" |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
31 #include "f77-fcn.h" |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
32 #include "lo-error.h" |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
33 #include "oct-locbuf.h" |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
34 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
35 extern "C" |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
36 { |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
37 F77_RET_T |
8368
c72c1c9bccdc
call blocked permuted qr factorization routines from LAPACK
Jaroslav Hajek <highegg@gmail.com>
parents:
8367
diff
changeset
|
38 F77_FUNC (sgeqp3, SGEQP3) (const octave_idx_type&, const octave_idx_type&, float*, |
10314
07ebe522dac2
untabify liboctave C++ sources
John W. Eaton <jwe@octave.org>
parents:
10158
diff
changeset
|
39 const octave_idx_type&, octave_idx_type*, float*, float*, |
8368
c72c1c9bccdc
call blocked permuted qr factorization routines from LAPACK
Jaroslav Hajek <highegg@gmail.com>
parents:
8367
diff
changeset
|
40 const octave_idx_type&, octave_idx_type&); |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
41 } |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
42 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
43 // It would be best to share some of this code with QR class... |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
44 |
9713
7918eb15040c
refactor the QR classes onto a templated base
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
45 FloatQRP::FloatQRP (const FloatMatrix& a, qr_type_t qr_type) |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
46 : FloatQR (), p () |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
47 { |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
48 init (a, qr_type); |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
49 } |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
50 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
51 void |
9713
7918eb15040c
refactor the QR classes onto a templated base
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
52 FloatQRP::init (const FloatMatrix& a, qr_type_t qr_type) |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
53 { |
9713
7918eb15040c
refactor the QR classes onto a templated base
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
54 assert (qr_type != qr_type_raw); |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
55 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
56 octave_idx_type m = a.rows (); |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
57 octave_idx_type n = a.cols (); |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
58 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
59 octave_idx_type min_mn = m < n ? m : n; |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
60 OCTAVE_LOCAL_BUFFER (float, tau, min_mn); |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
61 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
62 octave_idx_type info = 0; |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
63 |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
64 FloatMatrix afact = a; |
9713
7918eb15040c
refactor the QR classes onto a templated base
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
65 if (m > n && qr_type == qr_type_std) |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
66 afact.resize (m, m); |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
67 |
10350
12884915a8e4
merge MArray classes & improve Array interface
Jaroslav Hajek <highegg@gmail.com>
parents:
10314
diff
changeset
|
68 MArray<octave_idx_type> jpvt (n, 1, 0); |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
69 |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
70 if (m > 0) |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
71 { |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
72 // workspace query. |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
73 float rlwork; |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
74 F77_XFCN (sgeqp3, SGEQP3, (m, n, afact.fortran_vec (), m, jpvt.fortran_vec (), |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
75 tau, &rlwork, -1, info)); |
8368
c72c1c9bccdc
call blocked permuted qr factorization routines from LAPACK
Jaroslav Hajek <highegg@gmail.com>
parents:
8367
diff
changeset
|
76 |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
77 // allocate buffer and do the job. |
8811 | 78 octave_idx_type lwork = rlwork; |
79 lwork = std::max (lwork, static_cast<octave_idx_type> (1)); | |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
80 OCTAVE_LOCAL_BUFFER (float, work, lwork); |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
81 F77_XFCN (sgeqp3, SGEQP3, (m, n, afact.fortran_vec (), m, jpvt.fortran_vec (), |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
82 tau, work, lwork, info)); |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
83 } |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
84 else |
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
85 for (octave_idx_type i = 0; i < n; i++) jpvt(i) = i+1; |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
86 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
87 // Form Permutation matrix (if economy is requested, return the |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
88 // indices only!) |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
89 |
8811 | 90 jpvt -= static_cast<octave_idx_type> (1); |
8367
445d27d79f4e
support permutation matrix objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7792
diff
changeset
|
91 p = PermMatrix (jpvt, true); |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
92 |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
93 |
8597
c86718093c1b
improve & fix QR classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8375
diff
changeset
|
94 form (n, afact, tau, qr_type); |
7792
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
95 } |
39c1026191e9
add missing files from single-precision merge
John W. Eaton <jwe@octave.org>
parents:
diff
changeset
|
96 |
8367
445d27d79f4e
support permutation matrix objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7792
diff
changeset
|
97 FloatColumnVector |
445d27d79f4e
support permutation matrix objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7792
diff
changeset
|
98 FloatQRP::Pvec (void) const |
445d27d79f4e
support permutation matrix objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7792
diff
changeset
|
99 { |
8375
e3c9102431a9
fix design problems of diag & perm matrix classes
Jaroslav Hajek <highegg@gmail.com>
parents:
8368
diff
changeset
|
100 Array<float> pa (p.pvec ()); |
8367
445d27d79f4e
support permutation matrix objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7792
diff
changeset
|
101 FloatColumnVector pv (MArray<float> (pa) + 1.0f); |
445d27d79f4e
support permutation matrix objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7792
diff
changeset
|
102 return pv; |
445d27d79f4e
support permutation matrix objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7792
diff
changeset
|
103 } |