comparison liboctave/mx-inlines.cc @ 9550:3d6a9aea2aea

refactor binary & bool ops in liboctave
author Jaroslav Hajek <highegg@gmail.com>
date Wed, 19 Aug 2009 22:55:15 +0200
parents 9f870f73ab7d
children 0c72d9284087
comparison
equal deleted inserted replaced
9549:ed34b1da0e26 9550:3d6a9aea2aea
32 #include "quit.h" 32 #include "quit.h"
33 33
34 #include "oct-cmplx.h" 34 #include "oct-cmplx.h"
35 #include "oct-locbuf.h" 35 #include "oct-locbuf.h"
36 #include "oct-inttypes.h" 36 #include "oct-inttypes.h"
37 #include "Array-util.h"
38
39 // Provides some commonly repeated, basic loop templates.
37 40
38 template <class R, class S> 41 template <class R, class S>
39 inline void 42 inline void mx_inline_fill (size_t n, R *r, S s)
40 mx_inline_fill_vs (R *r, size_t n, S s) 43 { for (size_t i = 0; i < n; i++) r[i] = s; }
44
45 #define DEFMXUNOP(F, OP) \
46 template <class R, class X> \
47 inline void F (size_t n, R *r, const X *x) \
48 { for (size_t i = 0; i < n; i++) r[i] = OP x[i]; }
49
50 DEFMXUNOP (mx_inline_uminus, -)
51 DEFMXUNOP (mx_inline_not, !)
52
53 #define DEFMXUNBOOLOP(F, OP) \
54 template <class X> \
55 inline void F (size_t n, bool *r, const X *x) \
56 { const X zero = X(); for (size_t i = 0; i < n; i++) r[i] = x[i] OP zero; }
57
58 DEFMXUNBOOLOP (mx_inline_iszero, ==)
59 DEFMXUNBOOLOP (mx_inline_notzero, !=)
60
61 #define DEFMXBINOP(F, OP) \
62 template <class R, class X, class Y> \
63 inline void F (size_t n, R *r, const X *x, const Y *y) \
64 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y[i]; } \
65 template <class R, class X, class Y> \
66 inline void F (size_t n, R *r, const X *x, Y y) \
67 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y; } \
68 template <class R, class X, class Y> \
69 inline void F (size_t n, R *r, X x, const Y *y) \
70 { for (size_t i = 0; i < n; i++) r[i] = x OP y[i]; }
71
72 DEFMXBINOP (mx_inline_add, +)
73 DEFMXBINOP (mx_inline_sub, -)
74 DEFMXBINOP (mx_inline_mul, *)
75 DEFMXBINOP (mx_inline_div, /)
76
77 #define DEFMXBINOPEQ(F, OP) \
78 template <class R, class X> \
79 inline void F (size_t n, R *r, const X *x) \
80 { for (size_t i = 0; i < n; i++) r[i] OP x[i]; } \
81 template <class R, class X> \
82 inline void F (size_t n, R *r, X x) \
83 { for (size_t i = 0; i < n; i++) r[i] OP x; }
84
85 DEFMXBINOPEQ (mx_inline_add2, +=)
86 DEFMXBINOPEQ (mx_inline_sub2, -=)
87 DEFMXBINOPEQ (mx_inline_mul2, *=)
88 DEFMXBINOPEQ (mx_inline_div2, /=)
89
90 #define DEFMXCMPOP(F, OP) \
91 template <class X, class Y> \
92 inline void F (size_t n, bool *r, const X *x, const Y *y) \
93 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y[i]; } \
94 template <class X, class Y> \
95 inline void F (size_t n, bool *r, const X *x, Y y) \
96 { for (size_t i = 0; i < n; i++) r[i] = x[i] OP y; } \
97 template <class X, class Y> \
98 inline void F (size_t n, bool *r, X x, const Y *y) \
99 { for (size_t i = 0; i < n; i++) r[i] = x OP y[i]; }
100
101 DEFMXCMPOP (mx_inline_lt, <)
102 DEFMXCMPOP (mx_inline_le, <=)
103 DEFMXCMPOP (mx_inline_gt, >)
104 DEFMXCMPOP (mx_inline_ge, >=)
105 DEFMXCMPOP (mx_inline_eq, ==)
106 DEFMXCMPOP (mx_inline_ne, !=)
107
108 // For compatibility with certain loserware, cmp ops on complex nums only
109 // compare real parts, although "sort" defines ordering on complex numbers!
110
111 #define DEFCMPLXCMOP(F, OP) \
112 template <class X, class Y> \
113 inline void F (size_t n, bool *r, const std::complex<X> *x, const Y *y) \
114 { for (size_t i = 0; i < n; i++) r[i] = real (x[i]) OP real (y[i]); } \
115 template <class X, class Y> \
116 inline void F (size_t n, bool *r, const std::complex<X> *x, Y y) \
117 { for (size_t i = 0; i < n; i++) r[i] = real (x[i]) OP y; } \
118 template <class X, class Y> \
119 inline void F (size_t n, bool *r, X x, const std::complex<Y> *y) \
120 { for (size_t i = 0; i < n; i++) r[i] = x OP real (y[i]); }
121
122 #define DEFMXBOOLOP(F, EQ1, OP, EQ2) \
123 template <class X, class Y> \
124 inline void F (size_t n, bool *r, const X *x, const Y *y) \
125 { \
126 const X xzero = X(); \
127 const Y yzero = Y(); \
128 for (size_t i = 0; i < n; i++) \
129 r[i] = (x[i] EQ1 xzero) OP (y[i] EQ2 yzero); \
130 } \
131 template <class X, class Y> \
132 inline void F (size_t n, bool *r, const X *x, Y y) \
133 { \
134 const X xzero = X(); \
135 const bool yy = y EQ2 Y(); \
136 for (size_t i = 0; i < n; i++) r[i] = (x[i] EQ1 xzero) OP yy; \
137 } \
138 template <class X, class Y> \
139 inline void F (size_t n, bool *r, X x, const Y *y) \
140 { \
141 const bool xx = x EQ1 X(); \
142 const Y yzero = Y(); \
143 for (size_t i = 0; i < n; i++) r[i] = xx OP (y[i] EQ2 yzero); \
144 }
145
146 DEFMXBOOLOP (mx_inline_and, !=, &, !=)
147 DEFMXBOOLOP (mx_inline_or, !=, |, !=)
148 DEFMXBOOLOP (mx_inline_not_and, ==, &, !=)
149 DEFMXBOOLOP (mx_inline_not_or, ==, |, !=)
150 DEFMXBOOLOP (mx_inline_and_not, !=, &, ==)
151 DEFMXBOOLOP (mx_inline_or_not, !=, |, ==)
152
153 template <class T>
154 inline bool
155 mx_inline_any_nan (size_t, const T*) { return false; }
156
157 #define DEFMXANYNAN(T) \
158 inline bool \
159 mx_inline_any_nan (size_t n, const T* t) \
160 { \
161 for (size_t i = 0; i < n; i++) \
162 if (xisnan (t[i])) return true; \
163 return false; \
164 }
165
166 DEFMXANYNAN(double)
167 DEFMXANYNAN(float)
168 DEFMXANYNAN(Complex)
169 DEFMXANYNAN(FloatComplex)
170
171 // Arbitrary unary/binary function mappers. Note the function reference is a
172 // template parameter!
173 template <class R, class X, R F(X)>
174 void mx_inline_fun (size_t n, R *r, const X *x)
175 { for (size_t i = 0; i < n; i++) r[i] = F(x[i]); }
176
177 template <class R, class X, R F(const X&)>
178 void mx_inline_fun (size_t n, R *r, const X *x)
179 { for (size_t i = 0; i < n; i++) r[i] = F(x[i]); }
180
181 template <class R, class X, class Y, R F(X, Y)>
182 void mx_inline_fun (size_t n, R *r, const X *x, const Y *y)
183 { for (size_t i = 0; i < n; i++) r[i] = F(x[i], y[i]); }
184
185 template <class R, class X, class Y, R F(X, Y)>
186 void mx_inline_fun (size_t n, R *r, X x, const Y *y)
187 { for (size_t i = 0; i < n; i++) r[i] = F(x, y[i]); }
188
189 template <class R, class X, class Y, R F(X, Y)>
190 void mx_inline_fun (size_t n, R *r, const X *x, Y y)
191 { for (size_t i = 0; i < n; i++) r[i] = F(x[i], y); }
192
193 template <class R, class X, class Y, R F(const X&, const Y&)>
194 void mx_inline_fun (size_t n, R *r, const X *x, const Y *y)
195 { for (size_t i = 0; i < n; i++) r[i] = F(x[i], y[i]); }
196
197 template <class R, class X, class Y, R F(const X&, const Y&)>
198 void mx_inline_fun (size_t n, R *r, X x, const Y *y)
199 { for (size_t i = 0; i < n; i++) r[i] = F(x, y[i]); }
200
201 template <class R, class X, class Y, R F(const X&, const Y&)>
202 void mx_inline_fun (size_t n, R *r, const X *x, Y y)
203 { for (size_t i = 0; i < n; i++) r[i] = F(x[i], y); }
204
205 // Appliers. Since these call the operation just once, we pass it as
206 // a pointer, to allow the compiler reduce number of instances.
207
208 template <class RNDA, class XNDA>
209 inline RNDA
210 do_mx_unary_op (const XNDA& x,
211 void (*op) (size_t, typename RNDA::element_type *,
212 const typename XNDA::element_type *))
213 {
214 RNDA r (x.dims ());
215 op (r.nelem (), r.fortran_vec (), x.data ());
216 return r;
217 }
218
219 template <class RNDA, class XNDA, class YNDA>
220 inline RNDA
221 do_mm_binary_op (const XNDA& x, const YNDA& y,
222 void (*op) (size_t, typename RNDA::element_type *,
223 const typename XNDA::element_type *,
224 const typename YNDA::element_type *),
225 const char *opname)
226 {
227 dim_vector dx = x.dims (), dy = y.dims ();
228 if (dx == dy)
229 {
230 RNDA r (dx);
231 op (r.nelem (), r.fortran_vec (), x.data (), y.data ());
232 return r;
233 }
234 else
235 {
236 gripe_nonconformant (opname, dx, dy);
237 return RNDA ();
238 }
239 }
240
241 template <class RNDA, class XNDA, class YS>
242 inline RNDA
243 do_ms_binary_op (const XNDA& x, const YS& y,
244 void (*op) (size_t, typename RNDA::element_type *,
245 const typename XNDA::element_type *, YS))
246 {
247 RNDA r (x.dims ());
248 op (r.nelem (), r.fortran_vec (), x.data (), y);
249 return r;
250 }
251
252 template <class RNDA, class XS, class YNDA>
253 inline RNDA
254 do_sm_binary_op (const XS& x, const YNDA& y,
255 void (*op) (size_t, typename RNDA::element_type *, XS,
256 const typename YNDA::element_type *))
257 {
258 RNDA r (y.dims ());
259 op (r.nelem (), r.fortran_vec (), x, y.data ());
260 return r;
261 }
262
263 template <class RNDA, class XNDA>
264 inline RNDA&
265 do_mm_inplace_op (RNDA& r, const XNDA& x,
266 void (*op) (size_t, typename RNDA::element_type *,
267 const typename XNDA::element_type *),
268 const char *opname)
269 {
270 dim_vector dr = r.dims (), dx = x.dims ();
271 if (dr == dx)
272 {
273 op (r.nelem (), r.fortran_vec (), x.data ());
274 return r;
275 }
276 else
277 {
278 gripe_nonconformant (opname, dr, dx);
279 return RNDA ();
280 }
281 }
282
283 template <class RNDA, class XS>
284 inline RNDA&
285 do_ms_inplace_op (RNDA& r, const XS& x,
286 void (*op) (size_t, typename RNDA::element_type *, XS))
287 {
288 op (r.nelem (), r.fortran_vec (), x);
289 return r;
290 }
291
292 template <class T1, class T2>
293 inline bool
294 mx_inline_equal (size_t n, const T1 *x, const T2 *y)
41 { 295 {
42 for (size_t i = 0; i < n; i++) 296 for (size_t i = 0; i < n; i++)
43 r[i] = s; 297 if (x[i] != y[i])
44 } 298 return false;
45 299 return true;
46 #define VS_OP_FCN(F, OP) \ 300 }
47 template <class R, class V, class S> \
48 inline void \
49 F ## _vs (R *r, const V *v, size_t n, S s) \
50 { \
51 for (size_t i = 0; i < n; i++) \
52 r[i] = v[i] OP s; \
53 }
54
55 VS_OP_FCN (mx_inline_add, +)
56 VS_OP_FCN (mx_inline_subtract, -)
57 VS_OP_FCN (mx_inline_multiply, *)
58 VS_OP_FCN (mx_inline_divide, /)
59
60 #define VS_OP(F, OP, R, V, S) \
61 static inline R * \
62 F (const V *v, size_t n, S s) \
63 { \
64 R *r = 0; \
65 if (n > 0) \
66 { \
67 r = new R [n]; \
68 F ## _vs (r, v, n, s); \
69 } \
70 return r; \
71 }
72
73 #define VS_OPS(R, V, S) \
74 VS_OP (mx_inline_add, +, R, V, S) \
75 VS_OP (mx_inline_subtract, -, R, V, S) \
76 VS_OP (mx_inline_multiply, *, R, V, S) \
77 VS_OP (mx_inline_divide, /, R, V, S)
78
79 VS_OPS (double, double, double)
80 VS_OPS (Complex, double, Complex)
81 VS_OPS (Complex, Complex, double)
82 VS_OPS (Complex, Complex, Complex)
83
84 VS_OPS (float, float, float)
85 VS_OPS (FloatComplex, float, FloatComplex)
86 VS_OPS (FloatComplex, FloatComplex, float)
87 VS_OPS (FloatComplex, FloatComplex, FloatComplex)
88
89 #define SV_OP_FCN(F, OP) \
90 template <class R, class S, class V> \
91 inline void \
92 F ## _sv (R *r, S s, const V *v, size_t n) \
93 { \
94 for (size_t i = 0; i < n; i++) \
95 r[i] = s OP v[i]; \
96 } \
97
98 SV_OP_FCN (mx_inline_add, +)
99 SV_OP_FCN (mx_inline_subtract, -)
100 SV_OP_FCN (mx_inline_multiply, *)
101 SV_OP_FCN (mx_inline_divide, /)
102
103 #define SV_OP(F, OP, R, S, V) \
104 static inline R * \
105 F (S s, const V *v, size_t n) \
106 { \
107 R *r = 0; \
108 if (n > 0) \
109 { \
110 r = new R [n]; \
111 F ## _sv (r, s, v, n); \
112 } \
113 return r; \
114 }
115
116 #define SV_OPS(R, S, V) \
117 SV_OP (mx_inline_add, +, R, S, V) \
118 SV_OP (mx_inline_subtract, -, R, S, V) \
119 SV_OP (mx_inline_multiply, *, R, S, V) \
120 SV_OP (mx_inline_divide, /, R, S, V)
121
122 SV_OPS (double, double, double)
123 SV_OPS (Complex, double, Complex)
124 SV_OPS (Complex, Complex, double)
125 SV_OPS (Complex, Complex, Complex)
126
127 SV_OPS (float, float, float)
128 SV_OPS (FloatComplex, float, FloatComplex)
129 SV_OPS (FloatComplex, FloatComplex, float)
130 SV_OPS (FloatComplex, FloatComplex, FloatComplex)
131
132 #define VV_OP_FCN(F, OP) \
133 template <class R, class T1, class T2> \
134 inline void \
135 F ## _vv (R *r, const T1 *v1, const T2 *v2, size_t n) \
136 { \
137 for (size_t i = 0; i < n; i++) \
138 r[i] = v1[i] OP v2[i]; \
139 } \
140
141 VV_OP_FCN (mx_inline_add, +)
142 VV_OP_FCN (mx_inline_subtract, -)
143 VV_OP_FCN (mx_inline_multiply, *)
144 VV_OP_FCN (mx_inline_divide, /)
145
146 #define VV_OP(F, OP, R, T1, T2) \
147 static inline R * \
148 F (const T1 *v1, const T2 *v2, size_t n) \
149 { \
150 R *r = 0; \
151 if (n > 0) \
152 { \
153 r = new R [n]; \
154 F ## _vv (r, v1, v2, n); \
155 } \
156 return r; \
157 }
158
159 #define VV_OPS(R, T1, T2) \
160 VV_OP (mx_inline_add, +, R, T1, T2) \
161 VV_OP (mx_inline_subtract, -, R, T1, T2) \
162 VV_OP (mx_inline_multiply, *, R, T1, T2) \
163 VV_OP (mx_inline_divide, /, R, T1, T2)
164
165 VV_OPS (double, double, double)
166 VV_OPS (Complex, double, Complex)
167 VV_OPS (Complex, Complex, double)
168 VV_OPS (Complex, Complex, Complex)
169
170 VV_OPS (float, float, float)
171 VV_OPS (FloatComplex, float, FloatComplex)
172 VV_OPS (FloatComplex, FloatComplex, float)
173 VV_OPS (FloatComplex, FloatComplex, FloatComplex)
174
175 #define VS_OP2(F, OP, V, S) \
176 static inline V * \
177 F (V *v, size_t n, S s) \
178 { \
179 for (size_t i = 0; i < n; i++) \
180 v[i] OP s; \
181 return v; \
182 }
183
184 #define VS_OP2S(V, S) \
185 VS_OP2 (mx_inline_add2, +=, V, S) \
186 VS_OP2 (mx_inline_subtract2, -=, V, S) \
187 VS_OP2 (mx_inline_multiply2, *=, V, S) \
188 VS_OP2 (mx_inline_divide2, /=, V, S) \
189 VS_OP2 (mx_inline_copy, =, V, S)
190
191 VS_OP2S (double, double)
192 VS_OP2S (Complex, double)
193 VS_OP2S (Complex, Complex)
194
195 VS_OP2S (float, float)
196 VS_OP2S (FloatComplex, float)
197 VS_OP2S (FloatComplex, FloatComplex)
198
199 #define VV_OP2(F, OP, T1, T2) \
200 static inline T1 * \
201 F (T1 *v1, const T2 *v2, size_t n) \
202 { \
203 for (size_t i = 0; i < n; i++) \
204 v1[i] OP v2[i]; \
205 return v1; \
206 }
207
208 #define VV_OP2S(T1, T2) \
209 VV_OP2 (mx_inline_add2, +=, T1, T2) \
210 VV_OP2 (mx_inline_subtract2, -=, T1, T2) \
211 VV_OP2 (mx_inline_multiply2, *=, T1, T2) \
212 VV_OP2 (mx_inline_divide2, /=, T1, T2) \
213 VV_OP2 (mx_inline_copy, =, T1, T2)
214
215 VV_OP2S (double, double)
216 VV_OP2S (Complex, double)
217 VV_OP2S (Complex, Complex)
218
219 VV_OP2S (float, float)
220 VV_OP2S (FloatComplex, float)
221 VV_OP2S (FloatComplex, FloatComplex)
222
223 #define OP_EQ_FCN(T1, T2) \
224 static inline bool \
225 mx_inline_equal (const T1 *x, const T2 *y, size_t n) \
226 { \
227 for (size_t i = 0; i < n; i++) \
228 if (x[i] != y[i]) \
229 return false; \
230 return true; \
231 }
232
233 OP_EQ_FCN (bool, bool)
234 OP_EQ_FCN (char, char)
235 OP_EQ_FCN (double, double)
236 OP_EQ_FCN (Complex, Complex)
237 OP_EQ_FCN (float, float)
238 OP_EQ_FCN (FloatComplex, FloatComplex)
239 301
240 #define OP_DUP_FCN(OP, F, R, T) \ 302 #define OP_DUP_FCN(OP, F, R, T) \
241 static inline R * \ 303 static inline R * \
242 F (const T *x, size_t n) \ 304 F (const T *x, size_t n) \
243 { \ 305 { \
290 // ffast-math is on (not by default even with -O3). The following helper function 352 // ffast-math is on (not by default even with -O3). The following helper function
291 // gives the expected straightforward implementation of std::norm. 353 // gives the expected straightforward implementation of std::norm.
292 template <class T> 354 template <class T>
293 inline T cabsq (const std::complex<T>& c) 355 inline T cabsq (const std::complex<T>& c)
294 { return c.real () * c.real () + c.imag () * c.imag (); } 356 { return c.real () * c.real () + c.imag () * c.imag (); }
295
296 #define OP_RED_SUM(ac, el) ac += el
297 #define OP_RED_PROD(ac, el) ac *= el
298 #define OP_RED_SUMSQ(ac, el) ac += el*el
299 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el)
300 357
301 // default. works for integers and bool. 358 // default. works for integers and bool.
302 template <class T> 359 template <class T>
303 inline bool xis_true (T x) { return x; } 360 inline bool xis_true (T x) { return x; }
304 template <class T> 361 template <class T>
316 // Ditto for complex. 373 // Ditto for complex.
317 inline bool xis_true (const Complex& x) { return ! xisnan (x) && x != 0.0; } 374 inline bool xis_true (const Complex& x) { return ! xisnan (x) && x != 0.0; }
318 inline bool xis_false (const Complex& x) { return x == 0.0; } 375 inline bool xis_false (const Complex& x) { return x == 0.0; }
319 inline bool xis_true (const FloatComplex& x) { return ! xisnan (x) && x != 0.0f; } 376 inline bool xis_true (const FloatComplex& x) { return ! xisnan (x) && x != 0.0f; }
320 inline bool xis_false (const FloatComplex& x) { return x == 0.0f; } 377 inline bool xis_false (const FloatComplex& x) { return x == 0.0f; }
378
379 #define OP_RED_SUM(ac, el) ac += el
380 #define OP_RED_PROD(ac, el) ac *= el
381 #define OP_RED_SUMSQ(ac, el) ac += el*el
382 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el)
321 383
322 // The following two implement a simple short-circuiting. 384 // The following two implement a simple short-circuiting.
323 #define OP_RED_ANYC(ac, el) if (xis_true (el)) { ac = true; break; } else continue 385 #define OP_RED_ANYC(ac, el) if (xis_true (el)) { ac = true; break; } else continue
324 #define OP_RED_ALLC(ac, el) if (xis_false (el)) { ac = false; break; } else continue 386 #define OP_RED_ALLC(ac, el) if (xis_false (el)) { ac = false; break; } else continue
325 387