Mercurial > hg > octave-lyh
comparison liboctave/mx-inlines.cc @ 8736:53b4fdeacc2e
improve reduction functions
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Fri, 13 Feb 2009 21:04:50 +0100 |
parents | a1ae2aae903e |
children | 1bd918cfb6e2 |
comparison
equal
deleted
inserted
replaced
8735:afbfd7f4fd93 | 8736:53b4fdeacc2e |
---|---|
279 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, float, float) | 279 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, float, float) |
280 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, float, FloatComplex) | 280 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, float, FloatComplex) |
281 OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex) | 281 OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex) |
282 OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex) | 282 OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex) |
283 OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex) | 283 OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex) |
284 | |
285 // NOTE: std::norm is NOT equivalent | |
286 template <class T> | |
287 T cabsq (const std::complex<T>& c) | |
288 { return c.real () * c.real () + c.imag () * c.imag (); } | |
289 | |
290 #define OP_RED_SUM(ac, el) ac += el | |
291 #define OP_RED_PROD(ac, el) ac *= el | |
292 #define OP_RED_SUMSQ(ac, el) ac += el*el | |
293 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el) | |
294 | |
295 #define OP_RED_FCN(F, TSRC, OP, ZERO) \ | |
296 template <class T> \ | |
297 inline T \ | |
298 F (const TSRC* v, octave_idx_type n) \ | |
299 { \ | |
300 T ac = ZERO; \ | |
301 for (octave_idx_type i = 0; i < n; i++) \ | |
302 OP(ac, v[i]); \ | |
303 return ac; \ | |
304 } | |
305 | |
306 OP_RED_FCN (mx_inline_sum, T, OP_RED_SUM, 0) | |
307 OP_RED_FCN (mx_inline_prod, T, OP_RED_PROD, 1) | |
308 OP_RED_FCN (mx_inline_sumsq, T, OP_RED_SUMSQ, 0) | |
309 OP_RED_FCN (mx_inline_sumsq, std::complex<T>, OP_RED_SUMSQC, 0) | |
310 | |
311 #define OP_RED_FCN2(F, TSRC, OP, ZERO) \ | |
312 template <class T> \ | |
313 inline void \ | |
314 F (const TSRC* v, T *r, octave_idx_type m, octave_idx_type n) \ | |
315 { \ | |
316 for (octave_idx_type i = 0; i < m; i++) \ | |
317 r[i] = ZERO; \ | |
318 for (octave_idx_type j = 0; j < n; j++) \ | |
319 { \ | |
320 for (octave_idx_type i = 0; i < m; i++) \ | |
321 OP(r[i], v[i]); \ | |
322 v += m; \ | |
323 } \ | |
324 } | |
325 | |
326 OP_RED_FCN2 (mx_inline_sum, T, OP_RED_SUM, 0) | |
327 OP_RED_FCN2 (mx_inline_prod, T, OP_RED_PROD, 1) | |
328 OP_RED_FCN2 (mx_inline_sumsq, T, OP_RED_SUMSQ, 0) | |
329 OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, OP_RED_SUMSQC, 0) | |
330 | |
331 #define OP_RED_FCNN(F, TSRC) \ | |
332 template <class T> \ | |
333 inline void \ | |
334 F (const TSRC *v, T *r, octave_idx_type l, \ | |
335 octave_idx_type n, octave_idx_type u) \ | |
336 { \ | |
337 if (l == 1) \ | |
338 { \ | |
339 for (octave_idx_type i = 0; i < u; i++) \ | |
340 { \ | |
341 r[i] = F (v, n); \ | |
342 v += n; \ | |
343 } \ | |
344 } \ | |
345 else \ | |
346 { \ | |
347 for (octave_idx_type i = 0; i < u; i++) \ | |
348 { \ | |
349 F (v, r, l, n); \ | |
350 v += l*n; \ | |
351 r += l; \ | |
352 } \ | |
353 } \ | |
354 } | |
355 | |
356 OP_RED_FCNN (mx_inline_sum, T) | |
357 OP_RED_FCNN (mx_inline_prod, T) | |
358 OP_RED_FCNN (mx_inline_sumsq, T) | |
359 OP_RED_FCNN (mx_inline_sumsq, std::complex<T>) | |
360 | |
361 #define OP_CUM_FCN(F, OP) \ | |
362 template <class T> \ | |
363 inline void \ | |
364 F (const T *v, T *r, octave_idx_type n) \ | |
365 { \ | |
366 if (n) \ | |
367 { \ | |
368 T t = r[0] = v[0]; \ | |
369 for (octave_idx_type i = 1; i < n; i++) \ | |
370 r[i] = t = t OP v[i]; \ | |
371 } \ | |
372 } | |
373 | |
374 OP_CUM_FCN (mx_inline_cumsum, +) | |
375 OP_CUM_FCN (mx_inline_cumprod, *) | |
376 | |
377 #define OP_CUM_FCN2(F, OP) \ | |
378 template <class T> \ | |
379 inline void \ | |
380 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \ | |
381 { \ | |
382 if (n) \ | |
383 { \ | |
384 for (octave_idx_type i = 0; i < m; i++) \ | |
385 r[i] = v[i]; \ | |
386 const T *r0 = r; \ | |
387 for (octave_idx_type j = 1; j < n; j++) \ | |
388 { \ | |
389 r += m; v += m; \ | |
390 for (octave_idx_type i = 0; i < m; i++) \ | |
391 r[i] = v[i] OP r0[i]; \ | |
392 r0 += m; \ | |
393 } \ | |
394 } \ | |
395 } | |
396 | |
397 OP_CUM_FCN2 (mx_inline_cumsum, +) | |
398 OP_CUM_FCN2 (mx_inline_cumprod, *) | |
399 | |
400 #define OP_CUM_FCNN(F) \ | |
401 template <class T> \ | |
402 inline void \ | |
403 F (const T *v, T *r, octave_idx_type l, \ | |
404 octave_idx_type n, octave_idx_type u) \ | |
405 { \ | |
406 if (l == 1) \ | |
407 { \ | |
408 for (octave_idx_type i = 0; i < u; i++) \ | |
409 { \ | |
410 F (v, r, n); \ | |
411 v += n; r += n; \ | |
412 } \ | |
413 } \ | |
414 else \ | |
415 { \ | |
416 for (octave_idx_type i = 0; i < u; i++) \ | |
417 { \ | |
418 F (v, r, l, n); \ | |
419 v += l*n; \ | |
420 r += l*n; \ | |
421 } \ | |
422 } \ | |
423 } | |
424 | |
425 OP_CUM_FCNN (mx_inline_cumsum) | |
426 OP_CUM_FCNN (mx_inline_cumprod) | |
427 | |
428 // Assistant function | |
429 | |
430 inline void | |
431 get_extent_triplet (const dim_vector& dims, int& dim, | |
432 octave_idx_type& l, octave_idx_type& n, | |
433 octave_idx_type& u) | |
434 { | |
435 octave_idx_type ndims = dims.length (); | |
436 if (dim >= ndims) | |
437 { | |
438 l = dims.numel (); | |
439 n = 1; | |
440 u = 1; | |
441 } | |
442 else | |
443 { | |
444 if (dim < 0) | |
445 { | |
446 // find first non-singleton dim | |
447 for (dim = 0; dims(dim) == 1 && dim < ndims - 1; dim++) ; | |
448 } | |
449 // calculate extent triplet. | |
450 l = 1, n = dims(dim), u = 1; | |
451 for (octave_idx_type i = 0; i < dim; i++) | |
452 l *= dims (i); | |
453 for (octave_idx_type i = dim + 1; i < ndims; i++) | |
454 u *= dims (i); | |
455 } | |
456 } | |
457 | |
458 // Appliers. | |
459 // FIXME: is this the best design? C++ gives a lot of options here... | |
460 // maybe it can be done without an explicit parameter? | |
461 | |
462 template <class ArrayType, class T> | |
463 inline ArrayType | |
464 do_mx_red_op (const Array<T>& src, int dim, | |
465 void (*mx_red_op) (const T *, typename ArrayType::element_type *, | |
466 octave_idx_type, octave_idx_type, octave_idx_type)) | |
467 { | |
468 octave_idx_type l, n, u; | |
469 dim_vector dims = src.dims (); | |
470 get_extent_triplet (dims, dim, l, n, u); | |
471 | |
472 // Reduction operation reduces the array size. | |
473 if (dim < dims.length ()) dims(dim) = 1; | |
474 dims.chop_trailing_singletons (); | |
475 | |
476 ArrayType ret (dims); | |
477 mx_red_op (src.data (), ret.fortran_vec (), l, n, u); | |
478 | |
479 return ret; | |
480 } | |
481 | |
482 template <class ArrayType, class T> | |
483 inline ArrayType | |
484 do_mx_cum_op (const Array<T>& src, int dim, | |
485 void (*mx_cum_op) (const T *, typename ArrayType::element_type *, | |
486 octave_idx_type, octave_idx_type, octave_idx_type)) | |
487 { | |
488 octave_idx_type l, n, u; | |
489 dim_vector dims = src.dims (); | |
490 get_extent_triplet (dims, dim, l, n, u); | |
491 | |
492 // Cumulative operation doesn't reduce the array size. | |
493 ArrayType ret (dims); | |
494 mx_cum_op (src.data (), ret.fortran_vec (), l, n, u); | |
495 | |
496 return ret; | |
497 } | |
284 | 498 |
285 // Avoid some code duplication. Maybe we should use templates. | 499 // Avoid some code duplication. Maybe we should use templates. |
286 | 500 |
287 #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \ | 501 #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \ |
288 \ | 502 \ |