# HG changeset patch # User Jaroslav Hajek # Date 1263992212 -3600 # Node ID 9597eea7fa36fe34b2c33934c5b3bdb7608fcc63 # Parent fa01c1670b3ea4a5be802862280a38ebb19bc3ce inline xmin/xmax & optimize special cases diff --git a/liboctave/ChangeLog b/liboctave/ChangeLog --- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,3 +1,12 @@ +2010-01-20 Jaroslav Hajek + + * lo-mappers.h (xmin (double, double), xmax (double, double), + xmin (float, float), xmax (float, float)): Inline definitions. + * lo-mappers.cc (xmin (double, double), xmax (double, double), + xmin (float, float), xmax (float, float)): Remove from here. + * mx-inlines.cc (mx_inline_xmin, mx_inline_xmax): Provide + specializations for real array-scalar and scalar-array cases. + 2010-01-20 Jaroslav Hajek * oct-norm.cc (norm_accumulator_p::accum, norm_accumulator_mp::accum): diff --git a/liboctave/lo-mappers.cc b/liboctave/lo-mappers.cc --- a/liboctave/lo-mappers.cc +++ b/liboctave/lo-mappers.cc @@ -228,20 +228,6 @@ // (double, double) -> double mappers. -// According to Matlab, is both args are NaN, the first one is returned. - -double -xmin (double x, double y) -{ - return xisnan (y) ? x : (x <= y ? x : y); -} - -double -xmax (double x, double y) -{ - return xisnan (y) ? x : (x >= y ? x : y); -} - // complex -> complex mappers. Complex @@ -546,20 +532,6 @@ // (float, float) -> float mappers. -// FIXME -- need to handle NA too? - -float -xmin (float x, float y) -{ - return xisnan (y) ? x : (x <= y ? x : y); -} - -float -xmax (float x, float y) -{ - return xisnan (y) ? x : (x >= y ? x : y); -} - // complex -> complex mappers. FloatComplex diff --git a/liboctave/lo-mappers.h b/liboctave/lo-mappers.h --- a/liboctave/lo-mappers.h +++ b/liboctave/lo-mappers.h @@ -69,8 +69,13 @@ extern OCTAVE_API bool octave_is_NA (double x); extern OCTAVE_API bool octave_is_NaN_or_NA (double x) GCC_ATTR_DEPRECATED; -extern OCTAVE_API double xmin (double x, double y); -extern OCTAVE_API double xmax (double x, double y); +// This form is favorable. GCC will translate (x <= y ? x : y) without a jump, +// hence the only conditional jump involved will be the first (xisnan), infrequent +// and hence friendly to branch prediction. +inline double xmin (double x, double y) +{ return xisnan (y) ? x : (x <= y ? x : y);; } +inline double xmax (double x, double y) +{ return xisnan (y) ? x : (x >= y ? x : y);; } extern OCTAVE_API Complex acos (const Complex& x); extern OCTAVE_API Complex acosh (const Complex& x); @@ -141,8 +146,10 @@ extern OCTAVE_API bool octave_is_NA (float x); extern OCTAVE_API bool octave_is_NaN_or_NA (float x) GCC_ATTR_DEPRECATED; -extern OCTAVE_API float xmin (float x, float y); -extern OCTAVE_API float xmax (float x, float y); +inline float xmin (float x, float y) +{ return xisnan (y) ? x : (x <= y ? x : y);; } +inline float xmax (float x, float y) +{ return xisnan (y) ? x : (x >= y ? x : y);; } extern OCTAVE_API FloatComplex acos (const FloatComplex& x); extern OCTAVE_API FloatComplex acosh (const FloatComplex& x); diff --git a/liboctave/mx-inlines.cc b/liboctave/mx-inlines.cc --- a/liboctave/mx-inlines.cc +++ b/liboctave/mx-inlines.cc @@ -28,6 +28,7 @@ #include #include +#include #include "quit.h" @@ -35,6 +36,7 @@ #include "oct-locbuf.h" #include "oct-inttypes.h" #include "Array-util.h" +#include "Array-util.h" // Provides some commonly repeated, basic loop templates. @@ -245,6 +247,30 @@ DEFMXMAPPER2 (mx_inline_xmin, xmin) DEFMXMAPPER2 (mx_inline_xmax, xmax) +// Specialize array-scalar max/min +#define DEFMINMAXSPEC(T, F, OP) \ +template <> \ +inline void F (size_t n, T *r, const T *x, T y) \ +{ \ + if (xisnan (y)) \ + std::memcpy (r, x, n * sizeof (T)); \ + else \ + for (size_t i = 0; i < n; i++) r[i] = (x[i] OP y) ? x[i] : y; \ +} \ +template <> \ +inline void F (size_t n, T *r, T x, const T *y) \ +{ \ + if (xisnan (x)) \ + std::memcpy (r, y, n * sizeof (T)); \ + else \ + for (size_t i = 0; i < n; i++) r[i] = (y[i] OP x) ? y[i] : x; \ +} + +DEFMINMAXSPEC (double, mx_inline_xmin, <=) +DEFMINMAXSPEC (double, mx_inline_xmax, >=) +DEFMINMAXSPEC (float, mx_inline_xmin, <=) +DEFMINMAXSPEC (float, mx_inline_xmax, >=) + // Pairwise power #define DEFMXMAPPER2X(F, FUN) \ template \