Mercurial > hg > octave-nkf
annotate liboctave/mx-inlines.cc @ 8380:dbe67764e628
fix & improve speed of diagonal matrix multiplication
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Sun, 07 Dec 2008 10:29:34 +0100 |
parents | 82be108cc558 |
children | a1ae2aae903e |
rev | line source |
---|---|
3 | 1 /* |
2 | |
7017 | 3 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, |
4 2003, 2004, 2005, 2006, 2007 John W. Eaton | |
3 | 5 |
6 This file is part of Octave. | |
7 | |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
7016 | 10 Free Software Foundation; either version 3 of the License, or (at your |
11 option) any later version. | |
3 | 12 |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
7016 | 19 along with Octave; see the file COPYING. If not, see |
20 <http://www.gnu.org/licenses/>. | |
3 | 21 |
22 */ | |
23 | |
2828 | 24 #if !defined (octave_mx_inlines_h) |
25 #define octave_mx_inlines_h 1 | |
2804 | 26 |
27 #include <cstddef> | |
28 | |
5525 | 29 #include "quit.h" |
30 | |
1650 | 31 #include "oct-cmplx.h" |
461 | 32 |
8380
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
33 template <class R, class S> |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
34 inline void |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
35 mx_inline_fill_vs (R *r, size_t n, S s) |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
36 { |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
37 for (size_t i = 0; i < n; i++) |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
38 r[i] = s; |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
39 } |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
40 |
2811 | 41 #define VS_OP_FCN(F, OP) \ |
42 template <class R, class V, class S> \ | |
3262 | 43 inline void \ |
2811 | 44 F ## _vs (R *r, const V *v, size_t n, S s) \ |
45 { \ | |
46 for (size_t i = 0; i < n; i++) \ | |
47 r[i] = v[i] OP s; \ | |
48 } | |
49 | |
3769 | 50 VS_OP_FCN (mx_inline_add, +) |
51 VS_OP_FCN (mx_inline_subtract, -) | |
52 VS_OP_FCN (mx_inline_multiply, *) | |
53 VS_OP_FCN (mx_inline_divide, /) | |
3 | 54 |
2804 | 55 #define VS_OP(F, OP, R, V, S) \ |
56 static inline R * \ | |
57 F (const V *v, size_t n, S s) \ | |
58 { \ | |
59 R *r = 0; \ | |
60 if (n > 0) \ | |
61 { \ | |
62 r = new R [n]; \ | |
2811 | 63 F ## _vs (r, v, n, s); \ |
2804 | 64 } \ |
65 return r; \ | |
66 } | |
3 | 67 |
2804 | 68 #define VS_OPS(R, V, S) \ |
3769 | 69 VS_OP (mx_inline_add, +, R, V, S) \ |
70 VS_OP (mx_inline_subtract, -, R, V, S) \ | |
71 VS_OP (mx_inline_multiply, *, R, V, S) \ | |
72 VS_OP (mx_inline_divide, /, R, V, S) | |
3 | 73 |
2804 | 74 VS_OPS (double, double, double) |
75 VS_OPS (Complex, double, Complex) | |
76 VS_OPS (Complex, Complex, double) | |
77 VS_OPS (Complex, Complex, Complex) | |
3 | 78 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
79 VS_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
80 VS_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
81 VS_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
82 VS_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
83 |
2811 | 84 #define SV_OP_FCN(F, OP) \ |
85 template <class R, class S, class V> \ | |
3262 | 86 inline void \ |
2811 | 87 F ## _sv (R *r, S s, const V *v, size_t n) \ |
88 { \ | |
89 for (size_t i = 0; i < n; i++) \ | |
90 r[i] = s OP v[i]; \ | |
91 } \ | |
92 | |
3769 | 93 SV_OP_FCN (mx_inline_add, +) |
94 SV_OP_FCN (mx_inline_subtract, -) | |
95 SV_OP_FCN (mx_inline_multiply, *) | |
96 SV_OP_FCN (mx_inline_divide, /) | |
2811 | 97 |
2804 | 98 #define SV_OP(F, OP, R, S, V) \ |
99 static inline R * \ | |
100 F (S s, const V *v, size_t n) \ | |
101 { \ | |
102 R *r = 0; \ | |
103 if (n > 0) \ | |
104 { \ | |
105 r = new R [n]; \ | |
2811 | 106 F ## _sv (r, s, v, n); \ |
2804 | 107 } \ |
108 return r; \ | |
109 } | |
3 | 110 |
2804 | 111 #define SV_OPS(R, S, V) \ |
3769 | 112 SV_OP (mx_inline_add, +, R, S, V) \ |
113 SV_OP (mx_inline_subtract, -, R, S, V) \ | |
114 SV_OP (mx_inline_multiply, *, R, S, V) \ | |
115 SV_OP (mx_inline_divide, /, R, S, V) | |
3 | 116 |
2804 | 117 SV_OPS (double, double, double) |
118 SV_OPS (Complex, double, Complex) | |
119 SV_OPS (Complex, Complex, double) | |
120 SV_OPS (Complex, Complex, Complex) | |
3 | 121 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
122 SV_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
123 SV_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
124 SV_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
125 SV_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
126 |
2811 | 127 #define VV_OP_FCN(F, OP) \ |
128 template <class R, class T1, class T2> \ | |
3262 | 129 inline void \ |
2811 | 130 F ## _vv (R *r, const T1 *v1, const T2 *v2, size_t n) \ |
131 { \ | |
132 for (size_t i = 0; i < n; i++) \ | |
133 r[i] = v1[i] OP v2[i]; \ | |
134 } \ | |
135 | |
3769 | 136 VV_OP_FCN (mx_inline_add, +) |
137 VV_OP_FCN (mx_inline_subtract, -) | |
138 VV_OP_FCN (mx_inline_multiply, *) | |
139 VV_OP_FCN (mx_inline_divide, /) | |
2811 | 140 |
2804 | 141 #define VV_OP(F, OP, R, T1, T2) \ |
142 static inline R * \ | |
143 F (const T1 *v1, const T2 *v2, size_t n) \ | |
144 { \ | |
145 R *r = 0; \ | |
146 if (n > 0) \ | |
147 { \ | |
148 r = new R [n]; \ | |
2811 | 149 F ## _vv (r, v1, v2, n); \ |
2804 | 150 } \ |
151 return r; \ | |
152 } | |
3 | 153 |
2804 | 154 #define VV_OPS(R, T1, T2) \ |
3769 | 155 VV_OP (mx_inline_add, +, R, T1, T2) \ |
156 VV_OP (mx_inline_subtract, -, R, T1, T2) \ | |
157 VV_OP (mx_inline_multiply, *, R, T1, T2) \ | |
158 VV_OP (mx_inline_divide, /, R, T1, T2) | |
3 | 159 |
2804 | 160 VV_OPS (double, double, double) |
161 VV_OPS (Complex, double, Complex) | |
162 VV_OPS (Complex, Complex, double) | |
163 VV_OPS (Complex, Complex, Complex) | |
3 | 164 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
165 VV_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
166 VV_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
167 VV_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
168 VV_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
169 |
2804 | 170 #define VS_OP2(F, OP, V, S) \ |
171 static inline V * \ | |
172 F (V *v, size_t n, S s) \ | |
173 { \ | |
174 for (size_t i = 0; i < n; i++) \ | |
175 v[i] OP s; \ | |
176 return v; \ | |
177 } | |
3 | 178 |
2804 | 179 #define VS_OP2S(V, S) \ |
3769 | 180 VS_OP2 (mx_inline_add2, +=, V, S) \ |
181 VS_OP2 (mx_inline_subtract2, -=, V, S) \ | |
182 VS_OP2 (mx_inline_multiply2, *=, V, S) \ | |
183 VS_OP2 (mx_inline_divide2, /=, V, S) \ | |
184 VS_OP2 (mx_inline_copy, =, V, S) | |
3 | 185 |
2804 | 186 VS_OP2S (double, double) |
187 VS_OP2S (Complex, double) | |
188 VS_OP2S (Complex, Complex) | |
3 | 189 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
190 VS_OP2S (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
191 VS_OP2S (FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
192 VS_OP2S (FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
193 |
2804 | 194 #define VV_OP2(F, OP, T1, T2) \ |
195 static inline T1 * \ | |
196 F (T1 *v1, const T2 *v2, size_t n) \ | |
197 { \ | |
198 for (size_t i = 0; i < n; i++) \ | |
199 v1[i] OP v2[i]; \ | |
200 return v1; \ | |
201 } | |
3 | 202 |
2804 | 203 #define VV_OP2S(T1, T2) \ |
3769 | 204 VV_OP2 (mx_inline_add2, +=, T1, T2) \ |
205 VV_OP2 (mx_inline_subtract2, -=, T1, T2) \ | |
206 VV_OP2 (mx_inline_multiply2, *=, T1, T2) \ | |
207 VV_OP2 (mx_inline_divide2, /=, T1, T2) \ | |
208 VV_OP2 (mx_inline_copy, =, T1, T2) | |
3 | 209 |
2804 | 210 VV_OP2S (double, double) |
211 VV_OP2S (Complex, double) | |
212 VV_OP2S (Complex, Complex) | |
3 | 213 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
214 VV_OP2S (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
215 VV_OP2S (FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
216 VV_OP2S (FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
217 |
2804 | 218 #define OP_EQ_FCN(T1, T2) \ |
219 static inline bool \ | |
3769 | 220 mx_inline_equal (const T1 *x, const T2 *y, size_t n) \ |
2804 | 221 { \ |
222 for (size_t i = 0; i < n; i++) \ | |
223 if (x[i] != y[i]) \ | |
224 return false; \ | |
225 return true; \ | |
226 } | |
3 | 227 |
2828 | 228 OP_EQ_FCN (bool, bool) |
2804 | 229 OP_EQ_FCN (char, char) |
230 OP_EQ_FCN (double, double) | |
231 OP_EQ_FCN (Complex, Complex) | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
232 OP_EQ_FCN (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
233 OP_EQ_FCN (FloatComplex, FloatComplex) |
3 | 234 |
2804 | 235 #define OP_DUP_FCN(OP, F, R, T) \ |
236 static inline R * \ | |
237 F (const T *x, size_t n) \ | |
238 { \ | |
239 R *r = 0; \ | |
240 if (n > 0) \ | |
241 { \ | |
242 r = new R [n]; \ | |
243 for (size_t i = 0; i < n; i++) \ | |
244 r[i] = OP (x[i]); \ | |
245 } \ | |
246 return r; \ | |
247 } | |
3 | 248 |
3769 | 249 OP_DUP_FCN (, mx_inline_dup, double, double) |
250 OP_DUP_FCN (, mx_inline_dup, Complex, Complex) | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
251 OP_DUP_FCN (, mx_inline_dup, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
252 OP_DUP_FCN (, mx_inline_dup, FloatComplex, FloatComplex) |
3 | 253 |
2804 | 254 // These should really return a bool *. Also, they should probably be |
255 // in with a collection of other element-by-element boolean ops. | |
3769 | 256 OP_DUP_FCN (0.0 ==, mx_inline_not, double, double) |
257 OP_DUP_FCN (0.0 ==, mx_inline_not, double, Complex) | |
3 | 258 |
3769 | 259 OP_DUP_FCN (, mx_inline_make_complex, Complex, double) |
2804 | 260 |
3769 | 261 OP_DUP_FCN (-, mx_inline_change_sign, double, double) |
262 OP_DUP_FCN (-, mx_inline_change_sign, Complex, Complex) | |
3 | 263 |
3769 | 264 OP_DUP_FCN (real, mx_inline_real_dup, double, Complex) |
265 OP_DUP_FCN (imag, mx_inline_imag_dup, double, Complex) | |
266 OP_DUP_FCN (conj, mx_inline_conj_dup, Complex, Complex) | |
3 | 267 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
268 OP_DUP_FCN (0.0 ==, mx_inline_not, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
269 OP_DUP_FCN (static_cast<float>(0.0) ==, mx_inline_not, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
270 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
271 OP_DUP_FCN (, mx_inline_make_complex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
272 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
273 OP_DUP_FCN (-, mx_inline_change_sign, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
274 OP_DUP_FCN (-, mx_inline_change_sign, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
275 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
276 OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
277 OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
278 OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
279 |
3864 | 280 // Avoid some code duplication. Maybe we should use templates. |
281 | |
4015 | 282 #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \ |
3864 | 283 \ |
5275 | 284 octave_idx_type nr = rows (); \ |
285 octave_idx_type nc = cols (); \ | |
3864 | 286 \ |
287 RET_TYPE retval (nr, nc); \ | |
288 \ | |
289 if (nr > 0 && nc > 0) \ | |
290 { \ | |
291 if ((nr == 1 && dim == -1) || dim == 1) \ | |
292 { \ | |
5275 | 293 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 294 { \ |
295 ELT_TYPE t = elem (i, 0); \ | |
5275 | 296 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 297 { \ |
298 retval.elem (i, j) = t; \ | |
299 if (j < nc - 1) \ | |
300 t OP elem (i, j+1); \ | |
301 } \ | |
302 } \ | |
303 } \ | |
304 else \ | |
305 { \ | |
5275 | 306 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 307 { \ |
308 ELT_TYPE t = elem (0, j); \ | |
5275 | 309 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 310 { \ |
311 retval.elem (i, j) = t; \ | |
312 if (i < nr - 1) \ | |
313 t OP elem (i+1, j); \ | |
314 } \ | |
315 } \ | |
316 } \ | |
317 } \ | |
318 \ | |
319 return retval | |
320 | |
321 #define MX_BASE_REDUCTION_OP(RET_TYPE, ROW_EXPR, COL_EXPR, INIT_VAL, \ | |
322 MT_RESULT) \ | |
323 \ | |
5275 | 324 octave_idx_type nr = rows (); \ |
325 octave_idx_type nc = cols (); \ | |
3864 | 326 \ |
327 RET_TYPE retval; \ | |
328 \ | |
329 if (nr > 0 && nc > 0) \ | |
330 { \ | |
331 if ((nr == 1 && dim == -1) || dim == 1) \ | |
332 { \ | |
333 retval.resize (nr, 1); \ | |
5275 | 334 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 335 { \ |
336 retval.elem (i, 0) = INIT_VAL; \ | |
5275 | 337 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 338 { \ |
339 ROW_EXPR; \ | |
340 } \ | |
341 } \ | |
342 } \ | |
343 else \ | |
344 { \ | |
345 retval.resize (1, nc); \ | |
5275 | 346 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 347 { \ |
348 retval.elem (0, j) = INIT_VAL; \ | |
5275 | 349 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 350 { \ |
351 COL_EXPR; \ | |
352 } \ | |
353 } \ | |
354 } \ | |
355 } \ | |
4139 | 356 else if (nc == 0 && (nr == 0 || (nr == 1 && dim == -1))) \ |
357 retval.resize (1, 1, MT_RESULT); \ | |
4015 | 358 else if (nr == 0 && (dim == 0 || dim == -1)) \ |
359 retval.resize (1, nc, MT_RESULT); \ | |
360 else if (nc == 0 && dim == 1) \ | |
361 retval.resize (nr, 1, MT_RESULT); \ | |
362 else \ | |
4139 | 363 retval.resize (nr > 0, nc > 0); \ |
3864 | 364 \ |
365 return retval | |
366 | |
367 #define MX_REDUCTION_OP_ROW_EXPR(OP) \ | |
368 retval.elem (i, 0) OP elem (i, j) | |
369 | |
370 #define MX_REDUCTION_OP_COL_EXPR(OP) \ | |
371 retval.elem (0, j) OP elem (i, j) | |
372 | |
373 #define MX_REDUCTION_OP(RET_TYPE, OP, INIT_VAL, MT_RESULT) \ | |
374 MX_BASE_REDUCTION_OP (RET_TYPE, \ | |
375 MX_REDUCTION_OP_ROW_EXPR (OP), \ | |
376 MX_REDUCTION_OP_COL_EXPR (OP), \ | |
377 INIT_VAL, MT_RESULT) | |
4015 | 378 |
379 #define MX_ANY_ALL_OP_ROW_CODE(TEST_OP, TEST_TRUE_VAL) \ | |
380 if (elem (i, j) TEST_OP 0.0) \ | |
381 { \ | |
382 retval.elem (i, 0) = TEST_TRUE_VAL; \ | |
383 break; \ | |
384 } | |
385 | |
386 #define MX_ANY_ALL_OP_COL_CODE(TEST_OP, TEST_TRUE_VAL) \ | |
387 if (elem (i, j) TEST_OP 0.0) \ | |
388 { \ | |
389 retval.elem (0, j) = TEST_TRUE_VAL; \ | |
390 break; \ | |
391 } | |
392 | |
393 #define MX_ANY_ALL_OP(DIM, INIT_VAL, TEST_OP, TEST_TRUE_VAL) \ | |
394 MX_BASE_REDUCTION_OP (boolMatrix, \ | |
395 MX_ANY_ALL_OP_ROW_CODE (TEST_OP, TEST_TRUE_VAL), \ | |
396 MX_ANY_ALL_OP_COL_CODE (TEST_OP, TEST_TRUE_VAL), \ | |
397 INIT_VAL, INIT_VAL) | |
398 | |
399 #define MX_ALL_OP(DIM) MX_ANY_ALL_OP (DIM, true, ==, false) | |
400 | |
401 #define MX_ANY_OP(DIM) MX_ANY_ALL_OP (DIM, false, !=, true) | |
402 | |
4563 | 403 #define MX_ND_ALL_EXPR elem (iter_idx) == 0 |
4556 | 404 |
4563 | 405 #define MX_ND_ANY_EXPR elem (iter_idx) != 0 |
4556 | 406 |
407 #define MX_ND_ALL_EVAL(TEST_EXPR) \ | |
5520 | 408 if (retval(result_idx) && (TEST_EXPR)) \ |
409 retval(result_idx) = 0; | |
4556 | 410 |
411 #define MX_ND_ANY_EVAL(TEST_EXPR) \ | |
5520 | 412 if (retval(result_idx) || (TEST_EXPR)) \ |
413 retval(result_idx) = 1; | |
4569 | 414 |
5520 | 415 #define MX_ND_REDUCTION(EVAL_EXPR, INIT_VAL, RET_TYPE) \ |
4556 | 416 \ |
4569 | 417 RET_TYPE retval; \ |
4556 | 418 \ |
4932 | 419 dim_vector dv = this->dims (); \ |
5520 | 420 int nd = this->ndims (); \ |
4563 | 421 \ |
5955 | 422 int empty = false; \ |
4563 | 423 \ |
5520 | 424 for (int i = 0; i < nd; i++) \ |
4563 | 425 { \ |
5955 | 426 if (dv(i) == 0) \ |
4563 | 427 { \ |
5955 | 428 empty = true; \ |
4563 | 429 break; \ |
430 } \ | |
431 } \ | |
432 \ | |
5972 | 433 if (nd == 2 && dv(0) == 0 && dv(1) == 0) \ |
434 { \ | |
435 retval.resize (dim_vector (1, 1), INIT_VAL); \ | |
436 return retval; \ | |
437 } \ | |
438 \ | |
5520 | 439 /* We need to find first non-singleton dim. */ \ |
440 \ | |
441 if (dim == -1) \ | |
4556 | 442 { \ |
5520 | 443 dim = 0; \ |
444 \ | |
445 for (int i = 0; i < nd; i++) \ | |
4563 | 446 { \ |
5520 | 447 if (dv(i) != 1) \ |
4563 | 448 { \ |
449 dim = i; \ | |
450 break; \ | |
451 } \ | |
452 } \ | |
453 } \ | |
5520 | 454 else if (dim >= nd) \ |
4563 | 455 { \ |
5520 | 456 dim = nd++; \ |
457 dv.resize (nd, 1); \ | |
4563 | 458 } \ |
459 \ | |
5523 | 460 /* R = op (A, DIM) \ |
4563 | 461 \ |
5523 | 462 The strategy here is to access the elements of A along the \ |
463 dimension specified by DIM. This means that we loop over each \ | |
5615 | 464 element of R and adjust the index into A as needed. Store the \ |
465 cummulative product of all dimensions of A in CP_SZ. The last \ | |
466 element of CP_SZ is the total number of elements of A. */ \ | |
4563 | 467 \ |
5615 | 468 Array<octave_idx_type> cp_sz (nd+1, 1); \ |
469 for (int i = 1; i <= nd; i++) \ | |
5523 | 470 cp_sz(i) = cp_sz(i-1)*dv(i-1); \ |
5520 | 471 \ |
5523 | 472 octave_idx_type reset_at = cp_sz(dim); \ |
473 octave_idx_type base_incr = cp_sz(dim+1); \ | |
474 octave_idx_type incr = cp_sz(dim); \ | |
475 octave_idx_type base = 0; \ | |
476 octave_idx_type next_base = base + base_incr; \ | |
477 octave_idx_type iter_idx = base; \ | |
478 octave_idx_type n_elts = dv(dim); \ | |
4556 | 479 \ |
5520 | 480 dv(dim) = 1; \ |
4556 | 481 \ |
5520 | 482 retval.resize (dv, INIT_VAL); \ |
4556 | 483 \ |
5955 | 484 if (! empty) \ |
4556 | 485 { \ |
5955 | 486 octave_idx_type nel = dv.numel (); \ |
5523 | 487 \ |
5955 | 488 octave_idx_type k = 1; \ |
489 \ | |
490 for (octave_idx_type result_idx = 0; result_idx < nel; result_idx++) \ | |
5523 | 491 { \ |
5955 | 492 OCTAVE_QUIT; \ |
5523 | 493 \ |
5955 | 494 for (octave_idx_type j = 0; j < n_elts; j++) \ |
495 { \ | |
496 OCTAVE_QUIT; \ | |
497 \ | |
498 EVAL_EXPR; \ | |
5520 | 499 \ |
5955 | 500 iter_idx += incr; \ |
501 } \ | |
5523 | 502 \ |
5955 | 503 if (k == reset_at) \ |
504 { \ | |
505 base = next_base; \ | |
506 next_base += base_incr; \ | |
507 iter_idx = base; \ | |
508 k = 1; \ | |
509 } \ | |
510 else \ | |
511 { \ | |
512 base++; \ | |
513 iter_idx = base; \ | |
514 k++; \ | |
515 } \ | |
516 } \ | |
4556 | 517 } \ |
518 \ | |
4871 | 519 retval.chop_trailing_singletons (); \ |
520 \ | |
4556 | 521 return retval |
4569 | 522 |
523 #define MX_ND_REAL_OP_REDUCTION(ASN_EXPR, INIT_VAL) \ | |
5520 | 524 MX_ND_REDUCTION (retval(result_idx) ASN_EXPR, INIT_VAL, NDArray) |
4569 | 525 |
526 #define MX_ND_COMPLEX_OP_REDUCTION(ASN_EXPR, INIT_VAL) \ | |
5520 | 527 MX_ND_REDUCTION (retval(result_idx) ASN_EXPR, INIT_VAL, ComplexNDArray) |
4569 | 528 |
529 #define MX_ND_ANY_ALL_REDUCTION(EVAL_EXPR, VAL) \ | |
5520 | 530 MX_ND_REDUCTION (EVAL_EXPR, VAL, boolNDArray) |
4556 | 531 |
5523 | 532 #define MX_ND_CUMULATIVE_OP(RET_TYPE, ACC_TYPE, INIT_VAL, OP) \ |
533 \ | |
4584 | 534 RET_TYPE retval; \ |
535 \ | |
4932 | 536 dim_vector dv = this->dims (); \ |
5523 | 537 int nd = this->ndims (); \ |
4584 | 538 \ |
5955 | 539 bool empty = false; \ |
4584 | 540 \ |
5523 | 541 for (int i = 0; i < nd; i++) \ |
4584 | 542 { \ |
5955 | 543 if (dv(i) == 0) \ |
4584 | 544 { \ |
5955 | 545 empty = true; \ |
4584 | 546 break; \ |
547 } \ | |
548 } \ | |
549 \ | |
5523 | 550 /* We need to find first non-singleton dim. */ \ |
551 \ | |
4584 | 552 if (dim == -1) \ |
553 { \ | |
5523 | 554 dim = 0; \ |
555 \ | |
556 for (int i = 0; i < nd; i++) \ | |
4584 | 557 { \ |
5523 | 558 if (dv(i) != 1) \ |
4584 | 559 { \ |
560 dim = i; \ | |
561 break; \ | |
562 } \ | |
563 } \ | |
564 } \ | |
5523 | 565 else if (dim >= nd) \ |
4584 | 566 { \ |
5523 | 567 dim = nd++; \ |
568 dv.resize (nd, 1); \ | |
4584 | 569 } \ |
570 \ | |
5523 | 571 /* R = op (A, DIM) \ |
4584 | 572 \ |
5523 | 573 The strategy here is to access the elements of A along the \ |
574 dimension specified by DIM. This means that we loop over each \ | |
5611 | 575 element of R and adjust the index into A as needed. Store the \ |
5614 | 576 cummulative product of all dimensions of A in CP_SZ. The last \ |
577 element of CP_SZ is the total number of elements of A. */ \ | |
4584 | 578 \ |
5611 | 579 Array<octave_idx_type> cp_sz (nd+1, 1); \ |
580 for (int i = 1; i <= nd; i++) \ | |
5523 | 581 cp_sz(i) = cp_sz(i-1)*dv(i-1); \ |
4584 | 582 \ |
5523 | 583 octave_idx_type reset_at = cp_sz(dim); \ |
584 octave_idx_type base_incr = cp_sz(dim+1); \ | |
585 octave_idx_type incr = cp_sz(dim); \ | |
586 octave_idx_type base = 0; \ | |
587 octave_idx_type next_base = base + base_incr; \ | |
588 octave_idx_type iter_idx = base; \ | |
589 octave_idx_type n_elts = dv(dim); \ | |
4584 | 590 \ |
5523 | 591 retval.resize (dv, INIT_VAL); \ |
592 \ | |
5955 | 593 if (! empty) \ |
594 { \ | |
595 octave_idx_type nel = dv.numel () / n_elts; \ | |
5523 | 596 \ |
5955 | 597 octave_idx_type k = 1; \ |
4584 | 598 \ |
5955 | 599 for (octave_idx_type i = 0; i < nel; i++) \ |
600 { \ | |
601 OCTAVE_QUIT; \ | |
602 \ | |
603 ACC_TYPE prev_val = INIT_VAL; \ | |
604 \ | |
605 for (octave_idx_type j = 0; j < n_elts; j++) \ | |
606 { \ | |
607 OCTAVE_QUIT; \ | |
5523 | 608 \ |
5955 | 609 if (j == 0) \ |
610 { \ | |
611 retval(iter_idx) = elem (iter_idx); \ | |
612 prev_val = retval(iter_idx); \ | |
613 } \ | |
614 else \ | |
615 { \ | |
616 prev_val = prev_val OP elem (iter_idx); \ | |
617 retval(iter_idx) = prev_val; \ | |
618 } \ | |
5523 | 619 \ |
5955 | 620 iter_idx += incr; \ |
621 } \ | |
4584 | 622 \ |
5955 | 623 if (k == reset_at) \ |
5523 | 624 { \ |
5955 | 625 base = next_base; \ |
626 next_base += base_incr; \ | |
627 iter_idx = base; \ | |
628 k = 1; \ | |
4584 | 629 } \ |
630 else \ | |
5523 | 631 { \ |
5955 | 632 base++; \ |
633 iter_idx = base; \ | |
634 k++; \ | |
635 } \ | |
4584 | 636 } \ |
5523 | 637 } \ |
4584 | 638 \ |
5523 | 639 retval.chop_trailing_singletons (); \ |
640 \ | |
4584 | 641 return retval |
642 | |
2804 | 643 #endif |
3 | 644 |
645 /* | |
646 ;;; Local Variables: *** | |
647 ;;; mode: C++ *** | |
648 ;;; End: *** | |
649 */ |