Mercurial > hg > octave-lyh
diff liboctave/mx-inlines.cc @ 9721:192d94cff6c1
improve sum & implement the 'extra' option, refactor some code
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Tue, 13 Oct 2009 12:22:50 +0200 |
parents | 9ecd35a606e3 |
children | 26abff55f6fe |
line wrap: on
line diff
--- a/liboctave/mx-inlines.cc +++ b/liboctave/mx-inlines.cc @@ -415,6 +415,14 @@ #define OP_RED_SUMSQ(ac, el) ac += el*el #define OP_RED_SUMSQC(ac, el) ac += cabsq (el) +inline void op_dble_sum(double& ac, float el) +{ ac += el; } +inline void op_dble_sum(Complex& ac, const FloatComplex& el) +{ ac += el; } // FIXME: guaranteed? +template <class T> +inline void op_dble_sum(double& ac, const octave_int<T>& el) +{ ac += el.double_value (); } + // The following two implement a simple short-circuiting. #define OP_RED_ANYC(ac, el) if (xis_true (el)) { ac = true; break; } else continue #define OP_RED_ALLC(ac, el) if (xis_false (el)) { ac = false; break; } else continue @@ -430,7 +438,10 @@ return ac; \ } +#define PROMOTE_DOUBLE(T) typename subst_template_param<std::complex, T, double>::type + OP_RED_FCN (mx_inline_sum, T, T, OP_RED_SUM, 0) +OP_RED_FCN (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0) OP_RED_FCN (mx_inline_count, bool, T, OP_RED_SUM, 0) OP_RED_FCN (mx_inline_prod, T, T, OP_RED_PROD, 1) OP_RED_FCN (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) @@ -455,6 +466,7 @@ } OP_RED_FCN2 (mx_inline_sum, T, T, OP_RED_SUM, 0) +OP_RED_FCN2 (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0) OP_RED_FCN2 (mx_inline_count, bool, T, OP_RED_SUM, 0) OP_RED_FCN2 (mx_inline_prod, T, T, OP_RED_PROD, 1) OP_RED_FCN2 (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) @@ -518,6 +530,7 @@ } OP_RED_FCNN (mx_inline_sum, T, T) +OP_RED_FCNN (mx_inline_dsum, T, PROMOTE_DOUBLE(T)) OP_RED_FCNN (mx_inline_count, bool, T) OP_RED_FCNN (mx_inline_prod, T, T) OP_RED_FCNN (mx_inline_sumsq, T, T) @@ -1238,6 +1251,54 @@ return ret; } +// Fast extra-precise summation. According to +// T. Ogita, S. M. Rump, S. Oishi: +// Accurate Sum And Dot Product, +// SIAM J. Sci. Computing, Vol. 26, 2005 + +template <class T> +inline void twosum_accum (T& s, T& e, + const T& x) +{ + FLOAT_TRUNCATE T s1 = s + x, t = s1 - s, e1 = (s - (s1 - t)) + (x - t); + s = s1; + e += e1; +} + +template <class T> +inline T +mx_inline_xsum (const T *v, octave_idx_type n) +{ + T s = 0, e = 0; + for (octave_idx_type i = 0; i < n; i++) + twosum_accum (s, e, v[i]); + + return s + e; +} + +template <class T> +inline void +mx_inline_xsum (const T *v, T *r, + octave_idx_type m, octave_idx_type n) +{ + OCTAVE_LOCAL_BUFFER (T, e, m); + for (octave_idx_type i = 0; i < m; i++) + e[i] = r[i] = T (); + + for (octave_idx_type j = 0; j < n; j++) + { + for (octave_idx_type i = 0; i < m; i++) + twosum_accum (r[i], e[i], v[i]); + + v += m; + } + + for (octave_idx_type i = 0; i < m; i++) + r[i] += e[i]; +} + +OP_RED_FCNN (mx_inline_xsum, T, T) + #endif /*