# HG changeset patch # User Jaroslav Hajek # Date 1258542187 -3600 # Node ID c15a5ed0da58c17fcbc362fa8d8d364e804b7dca # Parent b2a282c37217a7abfa7fbf534b9e51b97f75d3a9 optimize bsxfun (@power, ...) diff --git a/liboctave/CNDArray.cc b/liboctave/CNDArray.cc --- a/liboctave/CNDArray.cc +++ b/liboctave/CNDArray.cc @@ -1079,6 +1079,8 @@ BSXFUN_STDOP_DEFS_MXLOOP (ComplexNDArray) BSXFUN_STDREL_DEFS_MXLOOP (ComplexNDArray) +BSXFUN_OP_DEF_MXLOOP (pow, ComplexNDArray, mx_inline_pow) + /* ;;; Local Variables: *** ;;; mode: C++ *** diff --git a/liboctave/CNDArray.h b/liboctave/CNDArray.h --- a/liboctave/CNDArray.h +++ b/liboctave/CNDArray.h @@ -181,6 +181,8 @@ BSXFUN_STDOP_DECLS (ComplexNDArray, OCTAVE_API) BSXFUN_STDREL_DECLS (ComplexNDArray, OCTAVE_API) +BSXFUN_OP_DECL (pow, ComplexNDArray, OCTAVE_API) + #endif /* diff --git a/liboctave/ChangeLog b/liboctave/ChangeLog --- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,3 +1,27 @@ +2009-11-18 Jaroslav Hajek + + * mx-inlines.cc (mx_inline_any_negative, mx_inline_pow): New loops. + (DEFMXMAPPER2X): New macro. + * bsxfun-decl.h (BSXFUN_OP2_DECL): New macro. + * bsxfun-defs.cc (BSXFUN_OP2_DEF, BSXFUN_OP2_DEF_MXLOOP): New macros. + (do_bsxfun_op): Use OCTAVE_QUIT. + * dNDArray.cc (NDArray::all_integers (void)): New method. + (NDArray::any_element_is_negative): Use mx_inline_any_negative. + (bsxfun_pow (const NDArray&, const NDArray&), + bsxfun_pow (const ComplexNDArray&, const NDArray&)): New functions. + * dNDArray.h: Update. + * fNDArray.cc (FloatNDArray::all_integers (void)): New method. + (FloatNDArray::any_element_is_negative): Use mx_inline_any_negative. + (bsxfun_pow (const FloatNDArray&, const FloatNDArray&), + bsxfun_pow (const FloatComplexNDArray&, const FloatNDArray&)): New functions. + * fNDArray.h: Update. + * CNDArray.cc (bsxfun_pow (const ComplexNDArray&, const + ComplexNDArray&)): New function. + * CNDArray.h: Update. + * fCNDArray.cc (bsxfun_pow (const FloatComplexNDArray&, const + FloatComplexNDArray&)): New function. + * fCNDArray.h: Update. + 2009-11-13 Jaroslav Hajek * CNDArray.cc (ComplexNDArray::all_elements_are_real): diff --git a/liboctave/bsxfun-decl.h b/liboctave/bsxfun-decl.h --- a/liboctave/bsxfun-decl.h +++ b/liboctave/bsxfun-decl.h @@ -27,6 +27,9 @@ #define BSXFUN_OP_DECL(OP, ARRAY, API) \ extern API ARRAY bsxfun_ ## OP (const ARRAY&, const ARRAY&); +#define BSXFUN_OP2_DECL(OP, ARRAY, ARRAY1, ARRAY2, API) \ +extern API ARRAY bsxfun_ ## OP (const ARRAY1&, const ARRAY2&); + #define BSXFUN_REL_DECL(OP, ARRAY, API) \ extern API boolNDArray bsxfun_ ## OP (const ARRAY&, const ARRAY&); diff --git a/liboctave/bsxfun-defs.cc b/liboctave/bsxfun-defs.cc --- a/liboctave/bsxfun-defs.cc +++ b/liboctave/bsxfun-defs.cc @@ -117,6 +117,8 @@ OCTAVE_LOCAL_BUFFER_INIT (octave_idx_type, idx, nd, 0); for (octave_idx_type iter = 0; iter < niter; iter++) { + OCTAVE_QUIT; + // Compute indices. // FIXME: performance impact noticeable? octave_idx_type xidx = cdvx.cum_compute_index (idx); @@ -141,6 +143,9 @@ #define BSXFUN_OP_DEF(OP, ARRAY) \ ARRAY bsxfun_ ## OP (const ARRAY& x, const ARRAY& y) +#define BSXFUN_OP2_DEF(OP, ARRAY, ARRAY1, ARRAY2) \ +ARRAY bsxfun_ ## OP (const ARRAY1& x, const ARRAY2& y) + #define BSXFUN_REL_DEF(OP, ARRAY) \ boolNDArray bsxfun_ ## OP (const ARRAY& x, const ARRAY& y) @@ -148,6 +153,10 @@ BSXFUN_OP_DEF(OP, ARRAY) \ { return do_bsxfun_op (x, y, LOOP, LOOP, LOOP); } +#define BSXFUN_OP2_DEF_MXLOOP(OP, ARRAY, ARRAY1, ARRAY2, LOOP) \ + BSXFUN_OP2_DEF(OP, ARRAY, ARRAY1, ARRAY2) \ + { return do_bsxfun_op (x, y, LOOP, LOOP, LOOP); } + #define BSXFUN_REL_DEF_MXLOOP(OP, ARRAY, LOOP) \ BSXFUN_REL_DEF(OP, ARRAY) \ { return do_bsxfun_op (x, y, LOOP, LOOP, LOOP); } diff --git a/liboctave/dNDArray.cc b/liboctave/dNDArray.cc --- a/liboctave/dNDArray.cc +++ b/liboctave/dNDArray.cc @@ -556,11 +556,7 @@ return true; } else - { - for (octave_idx_type i = 0; i < nel; i++) - if (elem (i) < 0) - return true; - } + return mx_inline_any_negative (numel (), data ()); return false; } @@ -673,6 +669,22 @@ } bool +NDArray::all_integers (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + double val = elem (i); + + if (D_NINT (val) != val) + return false; + } + + return true; +} + +bool NDArray::too_large_for_float (void) const { octave_idx_type nel = nelem (); @@ -1116,6 +1128,10 @@ BSXFUN_STDOP_DEFS_MXLOOP (NDArray) BSXFUN_STDREL_DEFS_MXLOOP (NDArray) +BSXFUN_OP_DEF_MXLOOP (pow, NDArray, mx_inline_pow) +BSXFUN_OP2_DEF_MXLOOP (pow, ComplexNDArray, ComplexNDArray, + NDArray, mx_inline_pow) + /* ;;; Local Variables: *** ;;; mode: C++ *** diff --git a/liboctave/dNDArray.h b/liboctave/dNDArray.h --- a/liboctave/dNDArray.h +++ b/liboctave/dNDArray.h @@ -82,6 +82,7 @@ bool all_elements_are_zero (void) const; bool all_elements_are_int_or_inf_or_nan (void) const; bool all_integers (double& max_val, double& min_val) const; + bool all_integers (void) const; bool too_large_for_float (void) const; // FIXME -- this is not quite the right thing. @@ -191,6 +192,10 @@ BSXFUN_STDOP_DECLS (NDArray, OCTAVE_API) BSXFUN_STDREL_DECLS (NDArray, OCTAVE_API) +BSXFUN_OP_DECL (pow, NDArray, OCTAVE_API) +BSXFUN_OP2_DECL (pow, ComplexNDArray, ComplexNDArray, + NDArray, OCTAVE_API) + #endif /* diff --git a/liboctave/fCNDArray.cc b/liboctave/fCNDArray.cc --- a/liboctave/fCNDArray.cc +++ b/liboctave/fCNDArray.cc @@ -1074,6 +1074,8 @@ BSXFUN_STDOP_DEFS_MXLOOP (FloatComplexNDArray) BSXFUN_STDREL_DEFS_MXLOOP (FloatComplexNDArray) +BSXFUN_OP_DEF_MXLOOP (pow, FloatComplexNDArray, mx_inline_pow) + /* ;;; Local Variables: *** ;;; mode: C++ *** diff --git a/liboctave/fCNDArray.h b/liboctave/fCNDArray.h --- a/liboctave/fCNDArray.h +++ b/liboctave/fCNDArray.h @@ -181,6 +181,8 @@ BSXFUN_STDOP_DECLS (FloatComplexNDArray, OCTAVE_API) BSXFUN_STDREL_DECLS (FloatComplexNDArray, OCTAVE_API) +BSXFUN_OP_DECL (pow, FloatComplexNDArray, OCTAVE_API) + #endif /* diff --git a/liboctave/fNDArray.cc b/liboctave/fNDArray.cc --- a/liboctave/fNDArray.cc +++ b/liboctave/fNDArray.cc @@ -514,11 +514,7 @@ return true; } else - { - for (octave_idx_type i = 0; i < nel; i++) - if (elem (i) < 0) - return true; - } + return mx_inline_any_negative (numel (), data ()); return false; } @@ -631,6 +627,22 @@ } bool +FloatNDArray::all_integers (void) const +{ + octave_idx_type nel = nelem (); + + for (octave_idx_type i = 0; i < nel; i++) + { + double val = elem (i); + + if (D_NINT (val) != val) + return false; + } + + return true; +} + +bool FloatNDArray::too_large_for_float (void) const { octave_idx_type nel = nelem (); @@ -1074,6 +1086,10 @@ BSXFUN_STDOP_DEFS_MXLOOP (FloatNDArray) BSXFUN_STDREL_DEFS_MXLOOP (FloatNDArray) +BSXFUN_OP_DEF_MXLOOP (pow, FloatNDArray, mx_inline_pow) +BSXFUN_OP2_DEF_MXLOOP (pow, FloatComplexNDArray, FloatComplexNDArray, + FloatNDArray, mx_inline_pow) + /* ;;; Local Variables: *** ;;; mode: C++ *** diff --git a/liboctave/fNDArray.h b/liboctave/fNDArray.h --- a/liboctave/fNDArray.h +++ b/liboctave/fNDArray.h @@ -79,6 +79,7 @@ bool all_elements_are_zero (void) const; bool all_elements_are_int_or_inf_or_nan (void) const; bool all_integers (float& max_val, float& min_val) const; + bool all_integers (void) const; bool too_large_for_float (void) const; // FIXME -- this is not quite the right thing. @@ -188,6 +189,10 @@ BSXFUN_STDOP_DECLS (FloatNDArray, OCTAVE_API) BSXFUN_STDREL_DECLS (FloatNDArray, OCTAVE_API) +BSXFUN_OP_DECL (pow, FloatNDArray, OCTAVE_API) +BSXFUN_OP2_DECL (pow, FloatComplexNDArray, FloatComplexNDArray, + FloatNDArray, OCTAVE_API) + #endif /* diff --git a/liboctave/mx-inlines.cc b/liboctave/mx-inlines.cc --- a/liboctave/mx-inlines.cc +++ b/liboctave/mx-inlines.cc @@ -192,6 +192,19 @@ return false; } +template +inline bool +mx_inline_any_negative (size_t n, const T* x) +{ + for (size_t i = 0; i < n; i++) + { + if (x[i] < 0) + return true; + } + + return false; +} + template inline bool mx_inline_all_real (size_t n, const std::complex* x) @@ -232,6 +245,20 @@ DEFMXMAPPER2 (mx_inline_xmin, xmin) DEFMXMAPPER2 (mx_inline_xmax, xmax) +// Pairwise power +#define DEFMXMAPPER2X(F, FUN) \ +template \ +inline void F (size_t n, R *r, const X *x, const Y *y) \ +{ for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \ +template \ +inline void F (size_t n, R *r, const X *x, Y y) \ +{ for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y); } \ +template \ +inline void F (size_t n, R *r, X x, const Y *y) \ +{ for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); } + +DEFMXMAPPER2X (mx_inline_pow, std::pow) + // Arbitrary function appliers. The function is a template parameter to enable // inlining. template diff --git a/src/ChangeLog b/src/ChangeLog --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,11 @@ +2009-11-18 Jaroslav Hajek + + * DLD-FUNCTIONS/bsxfun.cc (bsxfun_builtin_op): New member: + bsxfun_builtin_power. + (bsxfun_builtin_names): Include "power". + (do_bsxfun_real_pow): New static function. + (maybe_fill_table): Register power handlers. + 2009-11-13 Jaroslav Hajek * ov-complex.cc (octave_complex::try_narrowing_conversion): Don't diff --git a/src/DLD-FUNCTIONS/bsxfun.cc b/src/DLD-FUNCTIONS/bsxfun.cc --- a/src/DLD-FUNCTIONS/bsxfun.cc +++ b/src/DLD-FUNCTIONS/bsxfun.cc @@ -56,6 +56,7 @@ bsxfun_builtin_ge, bsxfun_builtin_and, bsxfun_builtin_or, + bsxfun_builtin_power, bsxfun_builtin_unknown, bsxfun_num_builtin_ops = bsxfun_builtin_unknown }; @@ -75,7 +76,8 @@ "gt", "ge", "and", - "or" + "or", + "power" }; static bsxfun_builtin_op @@ -110,6 +112,19 @@ return octave_value (bsxfun_rel (xa, ya)); } +// Pow needs a special handler for reals because of the potentially complex result. +template +static octave_value +do_bsxfun_real_pow (const octave_value& x, const octave_value& y) +{ + NDA xa = octave_value_extract (x); + NDA ya = octave_value_extract (y); + if (! ya.all_integers () && xa.any_element_is_negative ()) + return octave_value (bsxfun_pow (CNDA (xa), ya)); + else + return octave_value (bsxfun_pow (xa, ya)); +} + static void maybe_fill_table (void) { static bool filled = false; @@ -151,6 +166,15 @@ REGISTER_OP_HANDLER (bsxfun_builtin_and, btyp_bool, boolNDArray, bsxfun_and); REGISTER_OP_HANDLER (bsxfun_builtin_or, btyp_bool, boolNDArray, bsxfun_or); + // Register power handlers. + bsxfun_handler_table[bsxfun_builtin_power][btyp_double] = + do_bsxfun_real_pow; + bsxfun_handler_table[bsxfun_builtin_power][btyp_float] = + do_bsxfun_real_pow; + + REGISTER_OP_HANDLER (bsxfun_builtin_power, btyp_complex, ComplexNDArray, bsxfun_pow); + REGISTER_OP_HANDLER (bsxfun_builtin_power, btyp_float_complex, FloatComplexNDArray, bsxfun_pow); + filled = true; }