comparison liboctave/mx-inlines.cc @ 8736:53b4fdeacc2e

improve reduction functions
author Jaroslav Hajek <highegg@gmail.com>
date Fri, 13 Feb 2009 21:04:50 +0100
parents a1ae2aae903e
children 1bd918cfb6e2
comparison
equal deleted inserted replaced
8735:afbfd7f4fd93 8736:53b4fdeacc2e
279 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, float, float) 279 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, float, float)
280 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, float, FloatComplex) 280 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, float, FloatComplex)
281 OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex) 281 OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex)
282 OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex) 282 OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex)
283 OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex) 283 OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex)
284
285 // NOTE: std::norm is NOT equivalent
286 template <class T>
287 T cabsq (const std::complex<T>& c)
288 { return c.real () * c.real () + c.imag () * c.imag (); }
289
290 #define OP_RED_SUM(ac, el) ac += el
291 #define OP_RED_PROD(ac, el) ac *= el
292 #define OP_RED_SUMSQ(ac, el) ac += el*el
293 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el)
294
295 #define OP_RED_FCN(F, TSRC, OP, ZERO) \
296 template <class T> \
297 inline T \
298 F (const TSRC* v, octave_idx_type n) \
299 { \
300 T ac = ZERO; \
301 for (octave_idx_type i = 0; i < n; i++) \
302 OP(ac, v[i]); \
303 return ac; \
304 }
305
306 OP_RED_FCN (mx_inline_sum, T, OP_RED_SUM, 0)
307 OP_RED_FCN (mx_inline_prod, T, OP_RED_PROD, 1)
308 OP_RED_FCN (mx_inline_sumsq, T, OP_RED_SUMSQ, 0)
309 OP_RED_FCN (mx_inline_sumsq, std::complex<T>, OP_RED_SUMSQC, 0)
310
311 #define OP_RED_FCN2(F, TSRC, OP, ZERO) \
312 template <class T> \
313 inline void \
314 F (const TSRC* v, T *r, octave_idx_type m, octave_idx_type n) \
315 { \
316 for (octave_idx_type i = 0; i < m; i++) \
317 r[i] = ZERO; \
318 for (octave_idx_type j = 0; j < n; j++) \
319 { \
320 for (octave_idx_type i = 0; i < m; i++) \
321 OP(r[i], v[i]); \
322 v += m; \
323 } \
324 }
325
326 OP_RED_FCN2 (mx_inline_sum, T, OP_RED_SUM, 0)
327 OP_RED_FCN2 (mx_inline_prod, T, OP_RED_PROD, 1)
328 OP_RED_FCN2 (mx_inline_sumsq, T, OP_RED_SUMSQ, 0)
329 OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, OP_RED_SUMSQC, 0)
330
331 #define OP_RED_FCNN(F, TSRC) \
332 template <class T> \
333 inline void \
334 F (const TSRC *v, T *r, octave_idx_type l, \
335 octave_idx_type n, octave_idx_type u) \
336 { \
337 if (l == 1) \
338 { \
339 for (octave_idx_type i = 0; i < u; i++) \
340 { \
341 r[i] = F (v, n); \
342 v += n; \
343 } \
344 } \
345 else \
346 { \
347 for (octave_idx_type i = 0; i < u; i++) \
348 { \
349 F (v, r, l, n); \
350 v += l*n; \
351 r += l; \
352 } \
353 } \
354 }
355
356 OP_RED_FCNN (mx_inline_sum, T)
357 OP_RED_FCNN (mx_inline_prod, T)
358 OP_RED_FCNN (mx_inline_sumsq, T)
359 OP_RED_FCNN (mx_inline_sumsq, std::complex<T>)
360
361 #define OP_CUM_FCN(F, OP) \
362 template <class T> \
363 inline void \
364 F (const T *v, T *r, octave_idx_type n) \
365 { \
366 if (n) \
367 { \
368 T t = r[0] = v[0]; \
369 for (octave_idx_type i = 1; i < n; i++) \
370 r[i] = t = t OP v[i]; \
371 } \
372 }
373
374 OP_CUM_FCN (mx_inline_cumsum, +)
375 OP_CUM_FCN (mx_inline_cumprod, *)
376
377 #define OP_CUM_FCN2(F, OP) \
378 template <class T> \
379 inline void \
380 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \
381 { \
382 if (n) \
383 { \
384 for (octave_idx_type i = 0; i < m; i++) \
385 r[i] = v[i]; \
386 const T *r0 = r; \
387 for (octave_idx_type j = 1; j < n; j++) \
388 { \
389 r += m; v += m; \
390 for (octave_idx_type i = 0; i < m; i++) \
391 r[i] = v[i] OP r0[i]; \
392 r0 += m; \
393 } \
394 } \
395 }
396
397 OP_CUM_FCN2 (mx_inline_cumsum, +)
398 OP_CUM_FCN2 (mx_inline_cumprod, *)
399
400 #define OP_CUM_FCNN(F) \
401 template <class T> \
402 inline void \
403 F (const T *v, T *r, octave_idx_type l, \
404 octave_idx_type n, octave_idx_type u) \
405 { \
406 if (l == 1) \
407 { \
408 for (octave_idx_type i = 0; i < u; i++) \
409 { \
410 F (v, r, n); \
411 v += n; r += n; \
412 } \
413 } \
414 else \
415 { \
416 for (octave_idx_type i = 0; i < u; i++) \
417 { \
418 F (v, r, l, n); \
419 v += l*n; \
420 r += l*n; \
421 } \
422 } \
423 }
424
425 OP_CUM_FCNN (mx_inline_cumsum)
426 OP_CUM_FCNN (mx_inline_cumprod)
427
428 // Assistant function
429
430 inline void
431 get_extent_triplet (const dim_vector& dims, int& dim,
432 octave_idx_type& l, octave_idx_type& n,
433 octave_idx_type& u)
434 {
435 octave_idx_type ndims = dims.length ();
436 if (dim >= ndims)
437 {
438 l = dims.numel ();
439 n = 1;
440 u = 1;
441 }
442 else
443 {
444 if (dim < 0)
445 {
446 // find first non-singleton dim
447 for (dim = 0; dims(dim) == 1 && dim < ndims - 1; dim++) ;
448 }
449 // calculate extent triplet.
450 l = 1, n = dims(dim), u = 1;
451 for (octave_idx_type i = 0; i < dim; i++)
452 l *= dims (i);
453 for (octave_idx_type i = dim + 1; i < ndims; i++)
454 u *= dims (i);
455 }
456 }
457
458 // Appliers.
459 // FIXME: is this the best design? C++ gives a lot of options here...
460 // maybe it can be done without an explicit parameter?
461
462 template <class ArrayType, class T>
463 inline ArrayType
464 do_mx_red_op (const Array<T>& src, int dim,
465 void (*mx_red_op) (const T *, typename ArrayType::element_type *,
466 octave_idx_type, octave_idx_type, octave_idx_type))
467 {
468 octave_idx_type l, n, u;
469 dim_vector dims = src.dims ();
470 get_extent_triplet (dims, dim, l, n, u);
471
472 // Reduction operation reduces the array size.
473 if (dim < dims.length ()) dims(dim) = 1;
474 dims.chop_trailing_singletons ();
475
476 ArrayType ret (dims);
477 mx_red_op (src.data (), ret.fortran_vec (), l, n, u);
478
479 return ret;
480 }
481
482 template <class ArrayType, class T>
483 inline ArrayType
484 do_mx_cum_op (const Array<T>& src, int dim,
485 void (*mx_cum_op) (const T *, typename ArrayType::element_type *,
486 octave_idx_type, octave_idx_type, octave_idx_type))
487 {
488 octave_idx_type l, n, u;
489 dim_vector dims = src.dims ();
490 get_extent_triplet (dims, dim, l, n, u);
491
492 // Cumulative operation doesn't reduce the array size.
493 ArrayType ret (dims);
494 mx_cum_op (src.data (), ret.fortran_vec (), l, n, u);
495
496 return ret;
497 }
284 498
285 // Avoid some code duplication. Maybe we should use templates. 499 // Avoid some code duplication. Maybe we should use templates.
286 500
287 #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \ 501 #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \
288 \ 502 \