Mercurial > hg > octave-lyh
annotate liboctave/mx-inlines.cc @ 8756:d0755c9db5ed
implement fast logical sum (counting)
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 16 Feb 2009 12:41:55 +0100 |
parents | 9f7ce4bf7650 |
children | 83c9d60c3c47 |
rev | line source |
---|---|
3 | 1 /* |
2 | |
7017 | 3 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, |
4 2003, 2004, 2005, 2006, 2007 John W. Eaton | |
3 | 5 |
6 This file is part of Octave. | |
7 | |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
7016 | 10 Free Software Foundation; either version 3 of the License, or (at your |
11 option) any later version. | |
3 | 12 |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
7016 | 19 along with Octave; see the file COPYING. If not, see |
20 <http://www.gnu.org/licenses/>. | |
3 | 21 |
22 */ | |
23 | |
2828 | 24 #if !defined (octave_mx_inlines_h) |
25 #define octave_mx_inlines_h 1 | |
2804 | 26 |
27 #include <cstddef> | |
8650
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
28 #include <cmath> |
2804 | 29 |
5525 | 30 #include "quit.h" |
31 | |
1650 | 32 #include "oct-cmplx.h" |
461 | 33 |
8380
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
34 template <class R, class S> |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
35 inline void |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
36 mx_inline_fill_vs (R *r, size_t n, S s) |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
37 { |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
38 for (size_t i = 0; i < n; i++) |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
39 r[i] = s; |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
40 } |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
41 |
2811 | 42 #define VS_OP_FCN(F, OP) \ |
43 template <class R, class V, class S> \ | |
3262 | 44 inline void \ |
2811 | 45 F ## _vs (R *r, const V *v, size_t n, S s) \ |
46 { \ | |
47 for (size_t i = 0; i < n; i++) \ | |
48 r[i] = v[i] OP s; \ | |
49 } | |
50 | |
3769 | 51 VS_OP_FCN (mx_inline_add, +) |
52 VS_OP_FCN (mx_inline_subtract, -) | |
53 VS_OP_FCN (mx_inline_multiply, *) | |
54 VS_OP_FCN (mx_inline_divide, /) | |
3 | 55 |
2804 | 56 #define VS_OP(F, OP, R, V, S) \ |
57 static inline R * \ | |
58 F (const V *v, size_t n, S s) \ | |
59 { \ | |
60 R *r = 0; \ | |
61 if (n > 0) \ | |
62 { \ | |
63 r = new R [n]; \ | |
2811 | 64 F ## _vs (r, v, n, s); \ |
2804 | 65 } \ |
66 return r; \ | |
67 } | |
3 | 68 |
2804 | 69 #define VS_OPS(R, V, S) \ |
3769 | 70 VS_OP (mx_inline_add, +, R, V, S) \ |
71 VS_OP (mx_inline_subtract, -, R, V, S) \ | |
72 VS_OP (mx_inline_multiply, *, R, V, S) \ | |
73 VS_OP (mx_inline_divide, /, R, V, S) | |
3 | 74 |
2804 | 75 VS_OPS (double, double, double) |
76 VS_OPS (Complex, double, Complex) | |
77 VS_OPS (Complex, Complex, double) | |
78 VS_OPS (Complex, Complex, Complex) | |
3 | 79 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
80 VS_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
81 VS_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
82 VS_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
83 VS_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
84 |
2811 | 85 #define SV_OP_FCN(F, OP) \ |
86 template <class R, class S, class V> \ | |
3262 | 87 inline void \ |
2811 | 88 F ## _sv (R *r, S s, const V *v, size_t n) \ |
89 { \ | |
90 for (size_t i = 0; i < n; i++) \ | |
91 r[i] = s OP v[i]; \ | |
92 } \ | |
93 | |
3769 | 94 SV_OP_FCN (mx_inline_add, +) |
95 SV_OP_FCN (mx_inline_subtract, -) | |
96 SV_OP_FCN (mx_inline_multiply, *) | |
97 SV_OP_FCN (mx_inline_divide, /) | |
2811 | 98 |
2804 | 99 #define SV_OP(F, OP, R, S, V) \ |
100 static inline R * \ | |
101 F (S s, const V *v, size_t n) \ | |
102 { \ | |
103 R *r = 0; \ | |
104 if (n > 0) \ | |
105 { \ | |
106 r = new R [n]; \ | |
2811 | 107 F ## _sv (r, s, v, n); \ |
2804 | 108 } \ |
109 return r; \ | |
110 } | |
3 | 111 |
2804 | 112 #define SV_OPS(R, S, V) \ |
3769 | 113 SV_OP (mx_inline_add, +, R, S, V) \ |
114 SV_OP (mx_inline_subtract, -, R, S, V) \ | |
115 SV_OP (mx_inline_multiply, *, R, S, V) \ | |
116 SV_OP (mx_inline_divide, /, R, S, V) | |
3 | 117 |
2804 | 118 SV_OPS (double, double, double) |
119 SV_OPS (Complex, double, Complex) | |
120 SV_OPS (Complex, Complex, double) | |
121 SV_OPS (Complex, Complex, Complex) | |
3 | 122 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
123 SV_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
124 SV_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
125 SV_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
126 SV_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
127 |
2811 | 128 #define VV_OP_FCN(F, OP) \ |
129 template <class R, class T1, class T2> \ | |
3262 | 130 inline void \ |
2811 | 131 F ## _vv (R *r, const T1 *v1, const T2 *v2, size_t n) \ |
132 { \ | |
133 for (size_t i = 0; i < n; i++) \ | |
134 r[i] = v1[i] OP v2[i]; \ | |
135 } \ | |
136 | |
3769 | 137 VV_OP_FCN (mx_inline_add, +) |
138 VV_OP_FCN (mx_inline_subtract, -) | |
139 VV_OP_FCN (mx_inline_multiply, *) | |
140 VV_OP_FCN (mx_inline_divide, /) | |
2811 | 141 |
2804 | 142 #define VV_OP(F, OP, R, T1, T2) \ |
143 static inline R * \ | |
144 F (const T1 *v1, const T2 *v2, size_t n) \ | |
145 { \ | |
146 R *r = 0; \ | |
147 if (n > 0) \ | |
148 { \ | |
149 r = new R [n]; \ | |
2811 | 150 F ## _vv (r, v1, v2, n); \ |
2804 | 151 } \ |
152 return r; \ | |
153 } | |
3 | 154 |
2804 | 155 #define VV_OPS(R, T1, T2) \ |
3769 | 156 VV_OP (mx_inline_add, +, R, T1, T2) \ |
157 VV_OP (mx_inline_subtract, -, R, T1, T2) \ | |
158 VV_OP (mx_inline_multiply, *, R, T1, T2) \ | |
159 VV_OP (mx_inline_divide, /, R, T1, T2) | |
3 | 160 |
2804 | 161 VV_OPS (double, double, double) |
162 VV_OPS (Complex, double, Complex) | |
163 VV_OPS (Complex, Complex, double) | |
164 VV_OPS (Complex, Complex, Complex) | |
3 | 165 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
166 VV_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
167 VV_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
168 VV_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
169 VV_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
170 |
2804 | 171 #define VS_OP2(F, OP, V, S) \ |
172 static inline V * \ | |
173 F (V *v, size_t n, S s) \ | |
174 { \ | |
175 for (size_t i = 0; i < n; i++) \ | |
176 v[i] OP s; \ | |
177 return v; \ | |
178 } | |
3 | 179 |
2804 | 180 #define VS_OP2S(V, S) \ |
3769 | 181 VS_OP2 (mx_inline_add2, +=, V, S) \ |
182 VS_OP2 (mx_inline_subtract2, -=, V, S) \ | |
183 VS_OP2 (mx_inline_multiply2, *=, V, S) \ | |
184 VS_OP2 (mx_inline_divide2, /=, V, S) \ | |
185 VS_OP2 (mx_inline_copy, =, V, S) | |
3 | 186 |
2804 | 187 VS_OP2S (double, double) |
188 VS_OP2S (Complex, double) | |
189 VS_OP2S (Complex, Complex) | |
3 | 190 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
191 VS_OP2S (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
192 VS_OP2S (FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
193 VS_OP2S (FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
194 |
2804 | 195 #define VV_OP2(F, OP, T1, T2) \ |
196 static inline T1 * \ | |
197 F (T1 *v1, const T2 *v2, size_t n) \ | |
198 { \ | |
199 for (size_t i = 0; i < n; i++) \ | |
200 v1[i] OP v2[i]; \ | |
201 return v1; \ | |
202 } | |
3 | 203 |
2804 | 204 #define VV_OP2S(T1, T2) \ |
3769 | 205 VV_OP2 (mx_inline_add2, +=, T1, T2) \ |
206 VV_OP2 (mx_inline_subtract2, -=, T1, T2) \ | |
207 VV_OP2 (mx_inline_multiply2, *=, T1, T2) \ | |
208 VV_OP2 (mx_inline_divide2, /=, T1, T2) \ | |
209 VV_OP2 (mx_inline_copy, =, T1, T2) | |
3 | 210 |
2804 | 211 VV_OP2S (double, double) |
212 VV_OP2S (Complex, double) | |
213 VV_OP2S (Complex, Complex) | |
3 | 214 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
215 VV_OP2S (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
216 VV_OP2S (FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
217 VV_OP2S (FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
218 |
2804 | 219 #define OP_EQ_FCN(T1, T2) \ |
220 static inline bool \ | |
3769 | 221 mx_inline_equal (const T1 *x, const T2 *y, size_t n) \ |
2804 | 222 { \ |
223 for (size_t i = 0; i < n; i++) \ | |
224 if (x[i] != y[i]) \ | |
225 return false; \ | |
226 return true; \ | |
227 } | |
3 | 228 |
2828 | 229 OP_EQ_FCN (bool, bool) |
2804 | 230 OP_EQ_FCN (char, char) |
231 OP_EQ_FCN (double, double) | |
232 OP_EQ_FCN (Complex, Complex) | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
233 OP_EQ_FCN (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
234 OP_EQ_FCN (FloatComplex, FloatComplex) |
3 | 235 |
2804 | 236 #define OP_DUP_FCN(OP, F, R, T) \ |
237 static inline R * \ | |
238 F (const T *x, size_t n) \ | |
239 { \ | |
240 R *r = 0; \ | |
241 if (n > 0) \ | |
242 { \ | |
243 r = new R [n]; \ | |
244 for (size_t i = 0; i < n; i++) \ | |
245 r[i] = OP (x[i]); \ | |
246 } \ | |
247 return r; \ | |
248 } | |
3 | 249 |
3769 | 250 OP_DUP_FCN (, mx_inline_dup, double, double) |
251 OP_DUP_FCN (, mx_inline_dup, Complex, Complex) | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
252 OP_DUP_FCN (, mx_inline_dup, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
253 OP_DUP_FCN (, mx_inline_dup, FloatComplex, FloatComplex) |
3 | 254 |
2804 | 255 // These should really return a bool *. Also, they should probably be |
256 // in with a collection of other element-by-element boolean ops. | |
3769 | 257 OP_DUP_FCN (0.0 ==, mx_inline_not, double, double) |
258 OP_DUP_FCN (0.0 ==, mx_inline_not, double, Complex) | |
3 | 259 |
3769 | 260 OP_DUP_FCN (, mx_inline_make_complex, Complex, double) |
2804 | 261 |
3769 | 262 OP_DUP_FCN (-, mx_inline_change_sign, double, double) |
263 OP_DUP_FCN (-, mx_inline_change_sign, Complex, Complex) | |
3 | 264 |
8650
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
265 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, double, double) |
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
266 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, double, Complex) |
3769 | 267 OP_DUP_FCN (real, mx_inline_real_dup, double, Complex) |
268 OP_DUP_FCN (imag, mx_inline_imag_dup, double, Complex) | |
269 OP_DUP_FCN (conj, mx_inline_conj_dup, Complex, Complex) | |
3 | 270 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
271 OP_DUP_FCN (0.0 ==, mx_inline_not, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
272 OP_DUP_FCN (static_cast<float>(0.0) ==, mx_inline_not, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
273 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
274 OP_DUP_FCN (, mx_inline_make_complex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
275 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
276 OP_DUP_FCN (-, mx_inline_change_sign, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
277 OP_DUP_FCN (-, mx_inline_change_sign, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
278 |
8650
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
279 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, float, float) |
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
280 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, float, FloatComplex) |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
281 OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
282 OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
283 OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
284 |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
285 // NOTE: std::norm is NOT equivalent |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
286 template <class T> |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
287 inline T cabsq (const std::complex<T>& c) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
288 { return c.real () * c.real () + c.imag () * c.imag (); } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
289 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
290 #define OP_RED_SUM(ac, el) ac += el |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
291 #define OP_RED_PROD(ac, el) ac *= el |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
292 #define OP_RED_SUMSQ(ac, el) ac += el*el |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
293 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
294 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
295 // default. works for integers and bool. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
296 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
297 inline bool xis_true (T x) { return x; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
298 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
299 inline bool xis_false (T x) { return ! x; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
300 // for octave_ints |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
301 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
302 inline bool xis_true (const octave_int<T>& x) { return x.value (); } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
303 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
304 inline bool xis_false (const octave_int<T>& x) { return ! x.value (); } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
305 // for reals, we want to ignore NaNs. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
306 inline bool xis_true (double x) { return ! xisnan (x) && x != 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
307 inline bool xis_false (double x) { return x == 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
308 inline bool xis_true (float x) { return ! xisnan (x) && x != 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
309 inline bool xis_false (float x) { return x == 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
310 // Ditto for complex. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
311 inline bool xis_true (const Complex& x) { return ! xisnan (x) && x != 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
312 inline bool xis_false (const Complex& x) { return x == 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
313 inline bool xis_true (const FloatComplex& x) { return ! xisnan (x) && x != 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
314 inline bool xis_false (const FloatComplex& x) { return x == 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
315 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
316 // The following two implement a simple short-circuiting. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
317 #define OP_RED_ANYC(ac, el) if (xis_true (el)) { ac = true; break; } else continue |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
318 #define OP_RED_ALLC(ac, el) if (xis_false (el)) { ac = false; break; } else continue |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
319 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
320 // Row any/all reductions are a tradeoff - we traverse the array by |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
321 // columns to gain cache coherence, but sacrifice short-circuiting for that. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
322 // For certain logical arrays, this could mean a significant loss. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
323 // A more sophisticated implementation could introduce a buffer of active |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
324 // row indices to achieve both. Right now, I don't see the operation as |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
325 // important enough. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
326 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
327 #define OP_RED_ANYR(ac, el) if (xis_true (el)) ac = true |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
328 #define OP_RED_ALLR(ac, el) if (xis_false (el)) ac = false |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
329 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
330 #define OP_RED_FCN(F, TSRC, TRES, OP, ZERO) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
331 template <class T> \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
332 inline TRES \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
333 F (const TSRC* v, octave_idx_type n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
334 { \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
335 TRES ac = ZERO; \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
336 for (octave_idx_type i = 0; i < n; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
337 OP(ac, v[i]); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
338 return ac; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
339 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
340 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
341 OP_RED_FCN (mx_inline_sum, T, T, OP_RED_SUM, 0) |
8756
d0755c9db5ed
implement fast logical sum (counting)
Jaroslav Hajek <highegg@gmail.com>
parents:
8751
diff
changeset
|
342 OP_RED_FCN (mx_inline_count, bool, T, OP_RED_SUM, 0) |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
343 OP_RED_FCN (mx_inline_prod, T, T, OP_RED_PROD, 1) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
344 OP_RED_FCN (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
345 OP_RED_FCN (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
346 OP_RED_FCN (mx_inline_any, T, bool, OP_RED_ANYC, false) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
347 OP_RED_FCN (mx_inline_all, T, bool, OP_RED_ALLC, true) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
348 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
349 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
350 #define OP_RED_FCN2(F, TSRC, TRES, OP, ZERO) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
351 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
352 inline void \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
353 F (const TSRC* v, TRES *r, octave_idx_type m, octave_idx_type n) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
354 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
355 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
356 r[i] = ZERO; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
357 for (octave_idx_type j = 0; j < n; j++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
358 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
359 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
360 OP(r[i], v[i]); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
361 v += m; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
362 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
363 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
364 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
365 OP_RED_FCN2 (mx_inline_sum, T, T, OP_RED_SUM, 0) |
8756
d0755c9db5ed
implement fast logical sum (counting)
Jaroslav Hajek <highegg@gmail.com>
parents:
8751
diff
changeset
|
366 OP_RED_FCN2 (mx_inline_count, bool, T, OP_RED_SUM, 0) |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
367 OP_RED_FCN2 (mx_inline_prod, T, T, OP_RED_PROD, 1) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
368 OP_RED_FCN2 (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
369 OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
370 OP_RED_FCN2 (mx_inline_any, T, bool, OP_RED_ANYR, false) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
371 OP_RED_FCN2 (mx_inline_all, T, bool, OP_RED_ALLR, true) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
372 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
373 #define OP_RED_FCNN(F, TSRC, TRES) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
374 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
375 inline void \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
376 F (const TSRC *v, TRES *r, octave_idx_type l, \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
377 octave_idx_type n, octave_idx_type u) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
378 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
379 if (l == 1) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
380 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
381 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
382 { \ |
8756
d0755c9db5ed
implement fast logical sum (counting)
Jaroslav Hajek <highegg@gmail.com>
parents:
8751
diff
changeset
|
383 r[i] = F<T> (v, n); \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
384 v += n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
385 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
386 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
387 else \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
388 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
389 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
390 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
391 F (v, r, l, n); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
392 v += l*n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
393 r += l; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
394 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
395 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
396 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
397 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
398 OP_RED_FCNN (mx_inline_sum, T, T) |
8756
d0755c9db5ed
implement fast logical sum (counting)
Jaroslav Hajek <highegg@gmail.com>
parents:
8751
diff
changeset
|
399 OP_RED_FCNN (mx_inline_count, bool, T) |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
400 OP_RED_FCNN (mx_inline_prod, T, T) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
401 OP_RED_FCNN (mx_inline_sumsq, T, T) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
402 OP_RED_FCNN (mx_inline_sumsq, std::complex<T>, T) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
403 OP_RED_FCNN (mx_inline_any, T, bool) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
404 OP_RED_FCNN (mx_inline_all, T, bool) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
405 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
406 #define OP_CUM_FCN(F, OP) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
407 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
408 inline void \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
409 F (const T *v, T *r, octave_idx_type n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
410 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
411 if (n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
412 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
413 T t = r[0] = v[0]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
414 for (octave_idx_type i = 1; i < n; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
415 r[i] = t = t OP v[i]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
416 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
417 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
418 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
419 OP_CUM_FCN (mx_inline_cumsum, +) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
420 OP_CUM_FCN (mx_inline_cumprod, *) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
421 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
422 #define OP_CUM_FCN2(F, OP) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
423 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
424 inline void \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
425 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
426 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
427 if (n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
428 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
429 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
430 r[i] = v[i]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
431 const T *r0 = r; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
432 for (octave_idx_type j = 1; j < n; j++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
433 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
434 r += m; v += m; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
435 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
436 r[i] = v[i] OP r0[i]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
437 r0 += m; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
438 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
439 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
440 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
441 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
442 OP_CUM_FCN2 (mx_inline_cumsum, +) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
443 OP_CUM_FCN2 (mx_inline_cumprod, *) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
444 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
445 #define OP_CUM_FCNN(F) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
446 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
447 inline void \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
448 F (const T *v, T *r, octave_idx_type l, \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
449 octave_idx_type n, octave_idx_type u) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
450 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
451 if (l == 1) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
452 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
453 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
454 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
455 F (v, r, n); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
456 v += n; r += n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
457 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
458 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
459 else \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
460 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
461 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
462 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
463 F (v, r, l, n); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
464 v += l*n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
465 r += l*n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
466 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
467 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
468 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
469 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
470 OP_CUM_FCNN (mx_inline_cumsum) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
471 OP_CUM_FCNN (mx_inline_cumprod) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
472 |
8751
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
473 #define OP_MINMAX_FCN(F, OP) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
474 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
475 void F (const T *v, T *r, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
476 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
477 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
478 T tmp = v[0]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
479 octave_idx_type i = 1; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
480 while (xisnan (tmp) && i < n) tmp = v[i++]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
481 for (i = 1; i < n; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
482 if (v[i] OP tmp) tmp = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
483 *r = tmp; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
484 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
485 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
486 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
487 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
488 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
489 T tmp = v[0]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
490 octave_idx_type tmpi = 0; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
491 octave_idx_type i = 1; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
492 while (xisnan (tmp) && i < n) tmp = v[i++]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
493 for (i = 1; i < n; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
494 if (v[i] OP tmp) { tmp = v[i]; tmpi = i; }\ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
495 *r = tmp; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
496 *ri = tmpi; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
497 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
498 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
499 OP_MINMAX_FCN (mx_inline_min, <) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
500 OP_MINMAX_FCN (mx_inline_max, >) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
501 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
502 // Row reductions will be slightly complicated. We will proceed with checks |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
503 // for NaNs until we detect that no row will yield a NaN, in which case we |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
504 // proceed to a faster code. |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
505 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
506 #define OP_MINMAX_FCN2(F, OP) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
507 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
508 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
509 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
510 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
511 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
512 bool nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
513 octave_idx_type j = 0; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
514 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
515 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
516 r[i] = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
517 if (xisnan (v[i])) nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
518 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
519 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
520 while (nan && j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
521 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
522 nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
523 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
524 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
525 if (xisnan (v[i])) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
526 nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
527 else if (xisnan (r[i]) || v[i] OP r[i]) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
528 r[i] = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
529 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
530 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
531 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
532 while (j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
533 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
534 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
535 if (v[i] OP r[i]) r[i] = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
536 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
537 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
538 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
539 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
540 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
541 F (const T *v, T *r, octave_idx_type *ri, \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
542 octave_idx_type m, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
543 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
544 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
545 bool nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
546 octave_idx_type j = 0; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
547 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
548 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
549 r[i] = v[i]; ri[i] = j; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
550 if (xisnan (v[i])) nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
551 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
552 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
553 while (nan && j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
554 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
555 nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
556 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
557 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
558 if (xisnan (v[i])) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
559 nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
560 else if (xisnan (r[i]) || v[i] OP r[i]) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
561 { r[i] = v[i]; ri[i] = j; } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
562 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
563 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
564 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
565 while (j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
566 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
567 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
568 if (v[i] OP r[i]) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
569 { r[i] = v[i]; ri[i] = j; } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
570 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
571 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
572 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
573 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
574 OP_MINMAX_FCN2 (mx_inline_min, <) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
575 OP_MINMAX_FCN2 (mx_inline_max, >) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
576 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
577 #define OP_MINMAX_FCNN(F) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
578 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
579 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
580 F (const T *v, T *r, octave_idx_type l, \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
581 octave_idx_type n, octave_idx_type u) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
582 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
583 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
584 if (l == 1) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
585 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
586 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
587 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
588 F (v, r, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
589 v += n; r++; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
590 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
591 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
592 else \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
593 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
594 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
595 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
596 F (v, r, l, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
597 v += l*n; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
598 r += l; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
599 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
600 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
601 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
602 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
603 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
604 F (const T *v, T *r, octave_idx_type *ri, \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
605 octave_idx_type l, octave_idx_type n, octave_idx_type u) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
606 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
607 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
608 if (l == 1) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
609 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
610 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
611 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
612 F (v, r, ri, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
613 v += n; r++; ri++; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
614 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
615 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
616 else \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
617 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
618 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
619 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
620 F (v, r, ri, l, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
621 v += l*n; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
622 r += l; ri += l; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
623 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
624 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
625 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
626 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
627 OP_MINMAX_FCNN (mx_inline_min) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
628 OP_MINMAX_FCNN (mx_inline_max) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
629 |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
630 // Assistant function |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
631 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
632 inline void |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
633 get_extent_triplet (const dim_vector& dims, int& dim, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
634 octave_idx_type& l, octave_idx_type& n, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
635 octave_idx_type& u) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
636 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
637 octave_idx_type ndims = dims.length (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
638 if (dim >= ndims) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
639 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
640 l = dims.numel (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
641 n = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
642 u = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
643 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
644 else |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
645 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
646 if (dim < 0) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
647 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
648 // find first non-singleton dim |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
649 for (dim = 0; dims(dim) == 1 && dim < ndims - 1; dim++) ; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
650 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
651 // calculate extent triplet. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
652 l = 1, n = dims(dim), u = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
653 for (octave_idx_type i = 0; i < dim; i++) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
654 l *= dims (i); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
655 for (octave_idx_type i = dim + 1; i < ndims; i++) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
656 u *= dims (i); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
657 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
658 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
659 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
660 // Appliers. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
661 // FIXME: is this the best design? C++ gives a lot of options here... |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
662 // maybe it can be done without an explicit parameter? |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
663 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
664 template <class ArrayType, class T> |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
665 inline ArrayType |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
666 do_mx_red_op (const Array<T>& src, int dim, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
667 void (*mx_red_op) (const T *, typename ArrayType::element_type *, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
668 octave_idx_type, octave_idx_type, octave_idx_type)) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
669 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
670 octave_idx_type l, n, u; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
671 dim_vector dims = src.dims (); |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
672 // M*b inconsistency: sum([]) = 0 etc. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
673 if (dims.length () == 2 && dims(0) == 0 && dims(1) == 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
674 dims (1) = 1; |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
675 |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
676 get_extent_triplet (dims, dim, l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
677 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
678 // Reduction operation reduces the array size. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
679 if (dim < dims.length ()) dims(dim) = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
680 dims.chop_trailing_singletons (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
681 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
682 ArrayType ret (dims); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
683 mx_red_op (src.data (), ret.fortran_vec (), l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
684 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
685 return ret; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
686 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
687 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
688 template <class ArrayType, class T> |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
689 inline ArrayType |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
690 do_mx_cum_op (const Array<T>& src, int dim, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
691 void (*mx_cum_op) (const T *, typename ArrayType::element_type *, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
692 octave_idx_type, octave_idx_type, octave_idx_type)) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
693 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
694 octave_idx_type l, n, u; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
695 dim_vector dims = src.dims (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
696 get_extent_triplet (dims, dim, l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
697 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
698 // Cumulative operation doesn't reduce the array size. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
699 ArrayType ret (dims); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
700 mx_cum_op (src.data (), ret.fortran_vec (), l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
701 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
702 return ret; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
703 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
704 |
8751
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
705 template <class ArrayType> |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
706 inline ArrayType |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
707 do_mx_minmax_op (const ArrayType& src, int dim, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
708 void (*mx_minmax_op) (const typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
709 typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
710 octave_idx_type, octave_idx_type, octave_idx_type)) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
711 { |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
712 octave_idx_type l, n, u; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
713 dim_vector dims = src.dims (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
714 get_extent_triplet (dims, dim, l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
715 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
716 // If the dimension is zero, we don't do anything. |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
717 if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
718 dims.chop_trailing_singletons (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
719 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
720 ArrayType ret (dims); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
721 mx_minmax_op (src.data (), ret.fortran_vec (), l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
722 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
723 return ret; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
724 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
725 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
726 template <class ArrayType> |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
727 inline ArrayType |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
728 do_mx_minmax_op (const ArrayType& src, Array<octave_idx_type>& idx, int dim, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
729 void (*mx_minmax_op) (const typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
730 typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
731 octave_idx_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
732 octave_idx_type, octave_idx_type, octave_idx_type)) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
733 { |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
734 octave_idx_type l, n, u; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
735 dim_vector dims = src.dims (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
736 get_extent_triplet (dims, dim, l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
737 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
738 // If the dimension is zero, we don't do anything. |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
739 if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
740 dims.chop_trailing_singletons (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
741 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
742 ArrayType ret (dims); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
743 if (idx.dims () != dims) idx = Array<octave_idx_type> (dims); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
744 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
745 mx_minmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (), |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
746 l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
747 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
748 return ret; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
749 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
750 |
3864 | 751 // Avoid some code duplication. Maybe we should use templates. |
752 | |
4015 | 753 #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \ |
3864 | 754 \ |
5275 | 755 octave_idx_type nr = rows (); \ |
756 octave_idx_type nc = cols (); \ | |
3864 | 757 \ |
758 RET_TYPE retval (nr, nc); \ | |
759 \ | |
760 if (nr > 0 && nc > 0) \ | |
761 { \ | |
762 if ((nr == 1 && dim == -1) || dim == 1) \ | |
763 { \ | |
5275 | 764 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 765 { \ |
766 ELT_TYPE t = elem (i, 0); \ | |
5275 | 767 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 768 { \ |
769 retval.elem (i, j) = t; \ | |
770 if (j < nc - 1) \ | |
771 t OP elem (i, j+1); \ | |
772 } \ | |
773 } \ | |
774 } \ | |
775 else \ | |
776 { \ | |
5275 | 777 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 778 { \ |
779 ELT_TYPE t = elem (0, j); \ | |
5275 | 780 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 781 { \ |
782 retval.elem (i, j) = t; \ | |
783 if (i < nr - 1) \ | |
784 t OP elem (i+1, j); \ | |
785 } \ | |
786 } \ | |
787 } \ | |
788 } \ | |
789 \ | |
790 return retval | |
791 | |
792 #define MX_BASE_REDUCTION_OP(RET_TYPE, ROW_EXPR, COL_EXPR, INIT_VAL, \ | |
793 MT_RESULT) \ | |
794 \ | |
5275 | 795 octave_idx_type nr = rows (); \ |
796 octave_idx_type nc = cols (); \ | |
3864 | 797 \ |
798 RET_TYPE retval; \ | |
799 \ | |
800 if (nr > 0 && nc > 0) \ | |
801 { \ | |
802 if ((nr == 1 && dim == -1) || dim == 1) \ | |
803 { \ | |
804 retval.resize (nr, 1); \ | |
5275 | 805 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 806 { \ |
807 retval.elem (i, 0) = INIT_VAL; \ | |
5275 | 808 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 809 { \ |
810 ROW_EXPR; \ | |
811 } \ | |
812 } \ | |
813 } \ | |
814 else \ | |
815 { \ | |
816 retval.resize (1, nc); \ | |
5275 | 817 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 818 { \ |
819 retval.elem (0, j) = INIT_VAL; \ | |
5275 | 820 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 821 { \ |
822 COL_EXPR; \ | |
823 } \ | |
824 } \ | |
825 } \ | |
826 } \ | |
4139 | 827 else if (nc == 0 && (nr == 0 || (nr == 1 && dim == -1))) \ |
828 retval.resize (1, 1, MT_RESULT); \ | |
4015 | 829 else if (nr == 0 && (dim == 0 || dim == -1)) \ |
830 retval.resize (1, nc, MT_RESULT); \ | |
831 else if (nc == 0 && dim == 1) \ | |
832 retval.resize (nr, 1, MT_RESULT); \ | |
833 else \ | |
4139 | 834 retval.resize (nr > 0, nc > 0); \ |
3864 | 835 \ |
836 return retval | |
837 | |
838 #define MX_REDUCTION_OP_ROW_EXPR(OP) \ | |
839 retval.elem (i, 0) OP elem (i, j) | |
840 | |
841 #define MX_REDUCTION_OP_COL_EXPR(OP) \ | |
842 retval.elem (0, j) OP elem (i, j) | |
843 | |
844 #define MX_REDUCTION_OP(RET_TYPE, OP, INIT_VAL, MT_RESULT) \ | |
845 MX_BASE_REDUCTION_OP (RET_TYPE, \ | |
846 MX_REDUCTION_OP_ROW_EXPR (OP), \ | |
847 MX_REDUCTION_OP_COL_EXPR (OP), \ | |
848 INIT_VAL, MT_RESULT) | |
4015 | 849 |
850 #define MX_ANY_ALL_OP_ROW_CODE(TEST_OP, TEST_TRUE_VAL) \ | |
851 if (elem (i, j) TEST_OP 0.0) \ | |
852 { \ | |
853 retval.elem (i, 0) = TEST_TRUE_VAL; \ | |
854 break; \ | |
855 } | |
856 | |
857 #define MX_ANY_ALL_OP_COL_CODE(TEST_OP, TEST_TRUE_VAL) \ | |
858 if (elem (i, j) TEST_OP 0.0) \ | |
859 { \ | |
860 retval.elem (0, j) = TEST_TRUE_VAL; \ | |
861 break; \ | |
862 } | |
863 | |
864 #define MX_ANY_ALL_OP(DIM, INIT_VAL, TEST_OP, TEST_TRUE_VAL) \ | |
865 MX_BASE_REDUCTION_OP (boolMatrix, \ | |
866 MX_ANY_ALL_OP_ROW_CODE (TEST_OP, TEST_TRUE_VAL), \ | |
867 MX_ANY_ALL_OP_COL_CODE (TEST_OP, TEST_TRUE_VAL), \ | |
868 INIT_VAL, INIT_VAL) | |
869 | |
870 #define MX_ALL_OP(DIM) MX_ANY_ALL_OP (DIM, true, ==, false) | |
871 | |
872 #define MX_ANY_OP(DIM) MX_ANY_ALL_OP (DIM, false, !=, true) | |
873 | |
4563 | 874 #define MX_ND_ALL_EXPR elem (iter_idx) == 0 |
4556 | 875 |
4563 | 876 #define MX_ND_ANY_EXPR elem (iter_idx) != 0 |
4556 | 877 |
878 #define MX_ND_ALL_EVAL(TEST_EXPR) \ | |
5520 | 879 if (retval(result_idx) && (TEST_EXPR)) \ |
880 retval(result_idx) = 0; | |
4556 | 881 |
882 #define MX_ND_ANY_EVAL(TEST_EXPR) \ | |
5520 | 883 if (retval(result_idx) || (TEST_EXPR)) \ |
884 retval(result_idx) = 1; | |
4569 | 885 |
5520 | 886 #define MX_ND_REDUCTION(EVAL_EXPR, INIT_VAL, RET_TYPE) \ |
4556 | 887 \ |
4569 | 888 RET_TYPE retval; \ |
4556 | 889 \ |
4932 | 890 dim_vector dv = this->dims (); \ |
5520 | 891 int nd = this->ndims (); \ |
4563 | 892 \ |
5955 | 893 int empty = false; \ |
4563 | 894 \ |
5520 | 895 for (int i = 0; i < nd; i++) \ |
4563 | 896 { \ |
5955 | 897 if (dv(i) == 0) \ |
4563 | 898 { \ |
5955 | 899 empty = true; \ |
4563 | 900 break; \ |
901 } \ | |
902 } \ | |
903 \ | |
5972 | 904 if (nd == 2 && dv(0) == 0 && dv(1) == 0) \ |
905 { \ | |
906 retval.resize (dim_vector (1, 1), INIT_VAL); \ | |
907 return retval; \ | |
908 } \ | |
909 \ | |
5520 | 910 /* We need to find first non-singleton dim. */ \ |
911 \ | |
912 if (dim == -1) \ | |
4556 | 913 { \ |
5520 | 914 dim = 0; \ |
915 \ | |
916 for (int i = 0; i < nd; i++) \ | |
4563 | 917 { \ |
5520 | 918 if (dv(i) != 1) \ |
4563 | 919 { \ |
920 dim = i; \ | |
921 break; \ | |
922 } \ | |
923 } \ | |
924 } \ | |
5520 | 925 else if (dim >= nd) \ |
4563 | 926 { \ |
5520 | 927 dim = nd++; \ |
928 dv.resize (nd, 1); \ | |
4563 | 929 } \ |
930 \ | |
5523 | 931 /* R = op (A, DIM) \ |
4563 | 932 \ |
5523 | 933 The strategy here is to access the elements of A along the \ |
934 dimension specified by DIM. This means that we loop over each \ | |
5615 | 935 element of R and adjust the index into A as needed. Store the \ |
936 cummulative product of all dimensions of A in CP_SZ. The last \ | |
937 element of CP_SZ is the total number of elements of A. */ \ | |
4563 | 938 \ |
5615 | 939 Array<octave_idx_type> cp_sz (nd+1, 1); \ |
940 for (int i = 1; i <= nd; i++) \ | |
5523 | 941 cp_sz(i) = cp_sz(i-1)*dv(i-1); \ |
5520 | 942 \ |
5523 | 943 octave_idx_type reset_at = cp_sz(dim); \ |
944 octave_idx_type base_incr = cp_sz(dim+1); \ | |
945 octave_idx_type incr = cp_sz(dim); \ | |
946 octave_idx_type base = 0; \ | |
947 octave_idx_type next_base = base + base_incr; \ | |
948 octave_idx_type iter_idx = base; \ | |
949 octave_idx_type n_elts = dv(dim); \ | |
4556 | 950 \ |
5520 | 951 dv(dim) = 1; \ |
4556 | 952 \ |
5520 | 953 retval.resize (dv, INIT_VAL); \ |
4556 | 954 \ |
5955 | 955 if (! empty) \ |
4556 | 956 { \ |
5955 | 957 octave_idx_type nel = dv.numel (); \ |
5523 | 958 \ |
5955 | 959 octave_idx_type k = 1; \ |
960 \ | |
961 for (octave_idx_type result_idx = 0; result_idx < nel; result_idx++) \ | |
5523 | 962 { \ |
5955 | 963 OCTAVE_QUIT; \ |
5523 | 964 \ |
5955 | 965 for (octave_idx_type j = 0; j < n_elts; j++) \ |
966 { \ | |
967 OCTAVE_QUIT; \ | |
968 \ | |
969 EVAL_EXPR; \ | |
5520 | 970 \ |
5955 | 971 iter_idx += incr; \ |
972 } \ | |
5523 | 973 \ |
5955 | 974 if (k == reset_at) \ |
975 { \ | |
976 base = next_base; \ | |
977 next_base += base_incr; \ | |
978 iter_idx = base; \ | |
979 k = 1; \ | |
980 } \ | |
981 else \ | |
982 { \ | |
983 base++; \ | |
984 iter_idx = base; \ | |
985 k++; \ | |
986 } \ | |
987 } \ | |
4556 | 988 } \ |
989 \ | |
4871 | 990 retval.chop_trailing_singletons (); \ |
991 \ | |
4556 | 992 return retval |
4569 | 993 |
994 #define MX_ND_REAL_OP_REDUCTION(ASN_EXPR, INIT_VAL) \ | |
5520 | 995 MX_ND_REDUCTION (retval(result_idx) ASN_EXPR, INIT_VAL, NDArray) |
4569 | 996 |
997 #define MX_ND_COMPLEX_OP_REDUCTION(ASN_EXPR, INIT_VAL) \ | |
5520 | 998 MX_ND_REDUCTION (retval(result_idx) ASN_EXPR, INIT_VAL, ComplexNDArray) |
4569 | 999 |
1000 #define MX_ND_ANY_ALL_REDUCTION(EVAL_EXPR, VAL) \ | |
5520 | 1001 MX_ND_REDUCTION (EVAL_EXPR, VAL, boolNDArray) |
4556 | 1002 |
5523 | 1003 #define MX_ND_CUMULATIVE_OP(RET_TYPE, ACC_TYPE, INIT_VAL, OP) \ |
1004 \ | |
4584 | 1005 RET_TYPE retval; \ |
1006 \ | |
4932 | 1007 dim_vector dv = this->dims (); \ |
5523 | 1008 int nd = this->ndims (); \ |
4584 | 1009 \ |
5955 | 1010 bool empty = false; \ |
4584 | 1011 \ |
5523 | 1012 for (int i = 0; i < nd; i++) \ |
4584 | 1013 { \ |
5955 | 1014 if (dv(i) == 0) \ |
4584 | 1015 { \ |
5955 | 1016 empty = true; \ |
4584 | 1017 break; \ |
1018 } \ | |
1019 } \ | |
1020 \ | |
5523 | 1021 /* We need to find first non-singleton dim. */ \ |
1022 \ | |
4584 | 1023 if (dim == -1) \ |
1024 { \ | |
5523 | 1025 dim = 0; \ |
1026 \ | |
1027 for (int i = 0; i < nd; i++) \ | |
4584 | 1028 { \ |
5523 | 1029 if (dv(i) != 1) \ |
4584 | 1030 { \ |
1031 dim = i; \ | |
1032 break; \ | |
1033 } \ | |
1034 } \ | |
1035 } \ | |
5523 | 1036 else if (dim >= nd) \ |
4584 | 1037 { \ |
5523 | 1038 dim = nd++; \ |
1039 dv.resize (nd, 1); \ | |
4584 | 1040 } \ |
1041 \ | |
5523 | 1042 /* R = op (A, DIM) \ |
4584 | 1043 \ |
5523 | 1044 The strategy here is to access the elements of A along the \ |
1045 dimension specified by DIM. This means that we loop over each \ | |
5611 | 1046 element of R and adjust the index into A as needed. Store the \ |
5614 | 1047 cummulative product of all dimensions of A in CP_SZ. The last \ |
1048 element of CP_SZ is the total number of elements of A. */ \ | |
4584 | 1049 \ |
5611 | 1050 Array<octave_idx_type> cp_sz (nd+1, 1); \ |
1051 for (int i = 1; i <= nd; i++) \ | |
5523 | 1052 cp_sz(i) = cp_sz(i-1)*dv(i-1); \ |
4584 | 1053 \ |
5523 | 1054 octave_idx_type reset_at = cp_sz(dim); \ |
1055 octave_idx_type base_incr = cp_sz(dim+1); \ | |
1056 octave_idx_type incr = cp_sz(dim); \ | |
1057 octave_idx_type base = 0; \ | |
1058 octave_idx_type next_base = base + base_incr; \ | |
1059 octave_idx_type iter_idx = base; \ | |
1060 octave_idx_type n_elts = dv(dim); \ | |
4584 | 1061 \ |
5523 | 1062 retval.resize (dv, INIT_VAL); \ |
1063 \ | |
5955 | 1064 if (! empty) \ |
1065 { \ | |
1066 octave_idx_type nel = dv.numel () / n_elts; \ | |
5523 | 1067 \ |
5955 | 1068 octave_idx_type k = 1; \ |
4584 | 1069 \ |
5955 | 1070 for (octave_idx_type i = 0; i < nel; i++) \ |
1071 { \ | |
1072 OCTAVE_QUIT; \ | |
1073 \ | |
1074 ACC_TYPE prev_val = INIT_VAL; \ | |
1075 \ | |
1076 for (octave_idx_type j = 0; j < n_elts; j++) \ | |
1077 { \ | |
1078 OCTAVE_QUIT; \ | |
5523 | 1079 \ |
5955 | 1080 if (j == 0) \ |
1081 { \ | |
1082 retval(iter_idx) = elem (iter_idx); \ | |
1083 prev_val = retval(iter_idx); \ | |
1084 } \ | |
1085 else \ | |
1086 { \ | |
1087 prev_val = prev_val OP elem (iter_idx); \ | |
1088 retval(iter_idx) = prev_val; \ | |
1089 } \ | |
5523 | 1090 \ |
5955 | 1091 iter_idx += incr; \ |
1092 } \ | |
4584 | 1093 \ |
5955 | 1094 if (k == reset_at) \ |
5523 | 1095 { \ |
5955 | 1096 base = next_base; \ |
1097 next_base += base_incr; \ | |
1098 iter_idx = base; \ | |
1099 k = 1; \ | |
4584 | 1100 } \ |
1101 else \ | |
5523 | 1102 { \ |
5955 | 1103 base++; \ |
1104 iter_idx = base; \ | |
1105 k++; \ | |
1106 } \ | |
4584 | 1107 } \ |
5523 | 1108 } \ |
4584 | 1109 \ |
5523 | 1110 retval.chop_trailing_singletons (); \ |
1111 \ | |
4584 | 1112 return retval |
1113 | |
2804 | 1114 #endif |
3 | 1115 |
1116 /* | |
1117 ;;; Local Variables: *** | |
1118 ;;; mode: C++ *** | |
1119 ;;; End: *** | |
1120 */ |