# HG changeset patch
# User Jaroslav Hajek <highegg@gmail.com>
# Date 1258542187 -3600
# Node ID c15a5ed0da58c17fcbc362fa8d8d364e804b7dca
# Parent  b2a282c37217a7abfa7fbf534b9e51b97f75d3a9
optimize bsxfun (@power, ...)

diff --git a/liboctave/CNDArray.cc b/liboctave/CNDArray.cc
--- a/liboctave/CNDArray.cc
+++ b/liboctave/CNDArray.cc
@@ -1079,6 +1079,8 @@
 BSXFUN_STDOP_DEFS_MXLOOP (ComplexNDArray)
 BSXFUN_STDREL_DEFS_MXLOOP (ComplexNDArray)
 
+BSXFUN_OP_DEF_MXLOOP (pow, ComplexNDArray, mx_inline_pow)
+
 /*
 ;;; Local Variables: ***
 ;;; mode: C++ ***
diff --git a/liboctave/CNDArray.h b/liboctave/CNDArray.h
--- a/liboctave/CNDArray.h
+++ b/liboctave/CNDArray.h
@@ -181,6 +181,8 @@
 BSXFUN_STDOP_DECLS (ComplexNDArray, OCTAVE_API)
 BSXFUN_STDREL_DECLS (ComplexNDArray, OCTAVE_API)
 
+BSXFUN_OP_DECL (pow, ComplexNDArray, OCTAVE_API)
+
 #endif
 
 /*
diff --git a/liboctave/ChangeLog b/liboctave/ChangeLog
--- a/liboctave/ChangeLog
+++ b/liboctave/ChangeLog
@@ -1,3 +1,27 @@
+2009-11-18  Jaroslav Hajek  <highegg@gmail.com>
+
+	* mx-inlines.cc (mx_inline_any_negative, mx_inline_pow): New loops.
+	(DEFMXMAPPER2X): New macro.
+	* bsxfun-decl.h (BSXFUN_OP2_DECL): New macro.
+	* bsxfun-defs.cc (BSXFUN_OP2_DEF, BSXFUN_OP2_DEF_MXLOOP): New macros.
+	(do_bsxfun_op): Use OCTAVE_QUIT.
+	* dNDArray.cc (NDArray::all_integers (void)): New method.
+	(NDArray::any_element_is_negative): Use mx_inline_any_negative.
+	(bsxfun_pow (const NDArray&, const NDArray&),
+	bsxfun_pow (const ComplexNDArray&, const NDArray&)): New functions.
+	* dNDArray.h: Update.
+	* fNDArray.cc (FloatNDArray::all_integers (void)): New method.
+	(FloatNDArray::any_element_is_negative): Use mx_inline_any_negative.
+	(bsxfun_pow (const FloatNDArray&, const FloatNDArray&),
+	bsxfun_pow (const FloatComplexNDArray&, const FloatNDArray&)): New functions.
+	* fNDArray.h: Update.
+	* CNDArray.cc (bsxfun_pow (const ComplexNDArray&, const
+	ComplexNDArray&)): New function.
+	* CNDArray.h: Update.
+	* fCNDArray.cc (bsxfun_pow (const FloatComplexNDArray&, const
+	FloatComplexNDArray&)): New function.
+	* fCNDArray.h: Update.
+
 2009-11-13  Jaroslav Hajek  <highegg@gmail.com>
 
 	* CNDArray.cc (ComplexNDArray::all_elements_are_real): 
diff --git a/liboctave/bsxfun-decl.h b/liboctave/bsxfun-decl.h
--- a/liboctave/bsxfun-decl.h
+++ b/liboctave/bsxfun-decl.h
@@ -27,6 +27,9 @@
 #define BSXFUN_OP_DECL(OP, ARRAY, API) \
 extern API ARRAY bsxfun_ ## OP (const ARRAY&, const ARRAY&);
 
+#define BSXFUN_OP2_DECL(OP, ARRAY, ARRAY1, ARRAY2, API) \
+extern API ARRAY bsxfun_ ## OP (const ARRAY1&, const ARRAY2&);
+
 #define BSXFUN_REL_DECL(OP, ARRAY, API) \
 extern API boolNDArray bsxfun_ ## OP (const ARRAY&, const ARRAY&);
 
diff --git a/liboctave/bsxfun-defs.cc b/liboctave/bsxfun-defs.cc
--- a/liboctave/bsxfun-defs.cc
+++ b/liboctave/bsxfun-defs.cc
@@ -117,6 +117,8 @@
       OCTAVE_LOCAL_BUFFER_INIT (octave_idx_type, idx, nd, 0);
       for (octave_idx_type iter = 0; iter < niter; iter++)
         {
+          OCTAVE_QUIT;
+
           // Compute indices. 
           // FIXME: performance impact noticeable?
           octave_idx_type xidx = cdvx.cum_compute_index (idx);
@@ -141,6 +143,9 @@
 #define BSXFUN_OP_DEF(OP, ARRAY) \
 ARRAY bsxfun_ ## OP (const ARRAY& x, const ARRAY& y)
 
+#define BSXFUN_OP2_DEF(OP, ARRAY, ARRAY1, ARRAY2) \
+ARRAY bsxfun_ ## OP (const ARRAY1& x, const ARRAY2& y)
+
 #define BSXFUN_REL_DEF(OP, ARRAY) \
 boolNDArray bsxfun_ ## OP (const ARRAY& x, const ARRAY& y)
 
@@ -148,6 +153,10 @@
   BSXFUN_OP_DEF(OP, ARRAY) \
   { return do_bsxfun_op<ARRAY, ARRAY, ARRAY> (x, y, LOOP, LOOP, LOOP); }
 
+#define BSXFUN_OP2_DEF_MXLOOP(OP, ARRAY, ARRAY1, ARRAY2, LOOP) \
+  BSXFUN_OP2_DEF(OP, ARRAY, ARRAY1, ARRAY2) \
+  { return do_bsxfun_op<ARRAY, ARRAY1, ARRAY2> (x, y, LOOP, LOOP, LOOP); }
+
 #define BSXFUN_REL_DEF_MXLOOP(OP, ARRAY, LOOP) \
   BSXFUN_REL_DEF(OP, ARRAY) \
   { return do_bsxfun_op<boolNDArray, ARRAY, ARRAY> (x, y, LOOP, LOOP, LOOP); }
diff --git a/liboctave/dNDArray.cc b/liboctave/dNDArray.cc
--- a/liboctave/dNDArray.cc
+++ b/liboctave/dNDArray.cc
@@ -556,11 +556,7 @@
 	  return true;
     }
   else
-    {
-      for (octave_idx_type i = 0; i < nel; i++)
-	if (elem (i) < 0)
-	  return true;
-    }
+    return mx_inline_any_negative (numel (), data ());
 
   return false;
 }
@@ -673,6 +669,22 @@
 }
 
 bool
+NDArray::all_integers (void) const
+{
+  octave_idx_type nel = nelem ();
+
+  for (octave_idx_type i = 0; i < nel; i++)
+    {
+      double val = elem (i);
+
+      if (D_NINT (val) != val)
+	return false;
+    }
+
+  return true;
+}
+
+bool
 NDArray::too_large_for_float (void) const
 {
   octave_idx_type nel = nelem ();
@@ -1116,6 +1128,10 @@
 BSXFUN_STDOP_DEFS_MXLOOP (NDArray)
 BSXFUN_STDREL_DEFS_MXLOOP (NDArray)
 
+BSXFUN_OP_DEF_MXLOOP (pow, NDArray, mx_inline_pow)
+BSXFUN_OP2_DEF_MXLOOP (pow, ComplexNDArray, ComplexNDArray, 
+                       NDArray, mx_inline_pow)
+
 /*
 ;;; Local Variables: ***
 ;;; mode: C++ ***
diff --git a/liboctave/dNDArray.h b/liboctave/dNDArray.h
--- a/liboctave/dNDArray.h
+++ b/liboctave/dNDArray.h
@@ -82,6 +82,7 @@
   bool all_elements_are_zero (void) const;
   bool all_elements_are_int_or_inf_or_nan (void) const;
   bool all_integers (double& max_val, double& min_val) const;
+  bool all_integers (void) const;
   bool too_large_for_float (void) const;
 
   // FIXME -- this is not quite the right thing.
@@ -191,6 +192,10 @@
 BSXFUN_STDOP_DECLS (NDArray, OCTAVE_API)
 BSXFUN_STDREL_DECLS (NDArray, OCTAVE_API)
 
+BSXFUN_OP_DECL (pow, NDArray, OCTAVE_API)
+BSXFUN_OP2_DECL (pow, ComplexNDArray, ComplexNDArray, 
+                 NDArray, OCTAVE_API)
+
 #endif
 
 /*
diff --git a/liboctave/fCNDArray.cc b/liboctave/fCNDArray.cc
--- a/liboctave/fCNDArray.cc
+++ b/liboctave/fCNDArray.cc
@@ -1074,6 +1074,8 @@
 BSXFUN_STDOP_DEFS_MXLOOP (FloatComplexNDArray)
 BSXFUN_STDREL_DEFS_MXLOOP (FloatComplexNDArray)
 
+BSXFUN_OP_DEF_MXLOOP (pow, FloatComplexNDArray, mx_inline_pow)
+
 /*
 ;;; Local Variables: ***
 ;;; mode: C++ ***
diff --git a/liboctave/fCNDArray.h b/liboctave/fCNDArray.h
--- a/liboctave/fCNDArray.h
+++ b/liboctave/fCNDArray.h
@@ -181,6 +181,8 @@
 BSXFUN_STDOP_DECLS (FloatComplexNDArray, OCTAVE_API)
 BSXFUN_STDREL_DECLS (FloatComplexNDArray, OCTAVE_API)
 
+BSXFUN_OP_DECL (pow, FloatComplexNDArray, OCTAVE_API)
+
 #endif
 
 /*
diff --git a/liboctave/fNDArray.cc b/liboctave/fNDArray.cc
--- a/liboctave/fNDArray.cc
+++ b/liboctave/fNDArray.cc
@@ -514,11 +514,7 @@
 	  return true;
     }
   else
-    {
-      for (octave_idx_type i = 0; i < nel; i++)
-	if (elem (i) < 0)
-	  return true;
-    }
+    return mx_inline_any_negative (numel (), data ());
 
   return false;
 }
@@ -631,6 +627,22 @@
 }
 
 bool
+FloatNDArray::all_integers (void) const
+{
+  octave_idx_type nel = nelem ();
+
+  for (octave_idx_type i = 0; i < nel; i++)
+    {
+      double val = elem (i);
+
+      if (D_NINT (val) != val)
+	return false;
+    }
+
+  return true;
+}
+
+bool
 FloatNDArray::too_large_for_float (void) const
 {
   octave_idx_type nel = nelem ();
@@ -1074,6 +1086,10 @@
 BSXFUN_STDOP_DEFS_MXLOOP (FloatNDArray)
 BSXFUN_STDREL_DEFS_MXLOOP (FloatNDArray)
 
+BSXFUN_OP_DEF_MXLOOP (pow, FloatNDArray, mx_inline_pow)
+BSXFUN_OP2_DEF_MXLOOP (pow, FloatComplexNDArray, FloatComplexNDArray, 
+                       FloatNDArray, mx_inline_pow)
+
 /*
 ;;; Local Variables: ***
 ;;; mode: C++ ***
diff --git a/liboctave/fNDArray.h b/liboctave/fNDArray.h
--- a/liboctave/fNDArray.h
+++ b/liboctave/fNDArray.h
@@ -79,6 +79,7 @@
   bool all_elements_are_zero (void) const;
   bool all_elements_are_int_or_inf_or_nan (void) const;
   bool all_integers (float& max_val, float& min_val) const;
+  bool all_integers (void) const;
   bool too_large_for_float (void) const;
 
   // FIXME -- this is not quite the right thing.
@@ -188,6 +189,10 @@
 BSXFUN_STDOP_DECLS (FloatNDArray, OCTAVE_API)
 BSXFUN_STDREL_DECLS (FloatNDArray, OCTAVE_API)
 
+BSXFUN_OP_DECL (pow, FloatNDArray, OCTAVE_API)
+BSXFUN_OP2_DECL (pow, FloatComplexNDArray, FloatComplexNDArray, 
+                 FloatNDArray, OCTAVE_API)
+
 #endif
 
 /*
diff --git a/liboctave/mx-inlines.cc b/liboctave/mx-inlines.cc
--- a/liboctave/mx-inlines.cc
+++ b/liboctave/mx-inlines.cc
@@ -192,6 +192,19 @@
   return false;
 }
 
+template <class T> 
+inline bool 
+mx_inline_any_negative (size_t n, const T* x) 
+{
+  for (size_t i = 0; i < n; i++)
+    {
+      if (x[i] < 0)
+        return true;
+    }
+
+  return false;
+}
+
 template<class T>
 inline bool 
 mx_inline_all_real (size_t n, const std::complex<T>* x)
@@ -232,6 +245,20 @@
 DEFMXMAPPER2 (mx_inline_xmin, xmin)
 DEFMXMAPPER2 (mx_inline_xmax, xmax)
 
+// Pairwise power
+#define DEFMXMAPPER2X(F, FUN) \
+template <class R, class X, class Y> \
+inline void F (size_t n, R *r, const X *x, const Y *y) \
+{ for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \
+template <class R, class X, class Y> \
+inline void F (size_t n, R *r, const X *x, Y y) \
+{ for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y); } \
+template <class R, class X, class Y> \
+inline void F (size_t n, R *r, X x, const Y *y) \
+{ for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); }
+
+DEFMXMAPPER2X (mx_inline_pow, std::pow)
+
 // Arbitrary function appliers. The function is a template parameter to enable
 // inlining.
 template <class R, class X, R fun (X x)>
diff --git a/src/ChangeLog b/src/ChangeLog
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,11 @@
+2009-11-18  Jaroslav Hajek  <highegg@gmail.com>
+
+	* DLD-FUNCTIONS/bsxfun.cc (bsxfun_builtin_op): New member:
+	bsxfun_builtin_power.
+	(bsxfun_builtin_names): Include "power".
+	(do_bsxfun_real_pow): New static function.
+	(maybe_fill_table): Register power handlers.
+
 2009-11-13  Jaroslav Hajek  <highegg@gmail.com>
 
 	* ov-complex.cc (octave_complex::try_narrowing_conversion): Don't
diff --git a/src/DLD-FUNCTIONS/bsxfun.cc b/src/DLD-FUNCTIONS/bsxfun.cc
--- a/src/DLD-FUNCTIONS/bsxfun.cc
+++ b/src/DLD-FUNCTIONS/bsxfun.cc
@@ -56,6 +56,7 @@
   bsxfun_builtin_ge,
   bsxfun_builtin_and,
   bsxfun_builtin_or,
+  bsxfun_builtin_power,
   bsxfun_builtin_unknown,
   bsxfun_num_builtin_ops = bsxfun_builtin_unknown
 };
@@ -75,7 +76,8 @@
   "gt",
   "ge",
   "and",
-  "or"
+  "or",
+  "power"
 };
 
 static bsxfun_builtin_op 
@@ -110,6 +112,19 @@
   return octave_value (bsxfun_rel (xa, ya));
 }
 
+// Pow needs a special handler for reals because of the potentially complex result.
+template <class NDA, class CNDA>
+static octave_value
+do_bsxfun_real_pow (const octave_value& x, const octave_value& y)
+{
+  NDA xa = octave_value_extract<NDA> (x);
+  NDA ya = octave_value_extract<NDA> (y);
+  if (! ya.all_integers () && xa.any_element_is_negative ())
+    return octave_value (bsxfun_pow (CNDA (xa), ya));
+  else
+    return octave_value (bsxfun_pow (xa, ya));
+}
+
 static void maybe_fill_table (void)
 {
   static bool filled = false;
@@ -151,6 +166,15 @@
   REGISTER_OP_HANDLER (bsxfun_builtin_and, btyp_bool, boolNDArray, bsxfun_and);
   REGISTER_OP_HANDLER (bsxfun_builtin_or, btyp_bool, boolNDArray, bsxfun_or);
 
+  // Register power handlers.
+  bsxfun_handler_table[bsxfun_builtin_power][btyp_double] = 
+    do_bsxfun_real_pow<NDArray, ComplexNDArray>;
+  bsxfun_handler_table[bsxfun_builtin_power][btyp_float] = 
+    do_bsxfun_real_pow<FloatNDArray, FloatComplexNDArray>;
+
+  REGISTER_OP_HANDLER (bsxfun_builtin_power, btyp_complex, ComplexNDArray, bsxfun_pow);
+  REGISTER_OP_HANDLER (bsxfun_builtin_power, btyp_float_complex, FloatComplexNDArray, bsxfun_pow);
+
   filled = true;
 }