Mercurial > hg > octave-nkf
changeset 13139:aa4a23337a0f
Enable BSX in-place for missing assignment operators
* bsxfun-defs.cc (do_inplace_bsxfun_op): New function.
* bsxfun.h (is_valid_bsxfun): Fix logic, had bug with empty
dimensions. (is_valid_inplace_bsxfun): New function.
* mx-inlines.cc (DEFMXBOOLOPEQ): Add missing function for
vector-by-scalar operation. (do_mm_inplace_op): Call new
inplace_bsxfun functions.
* MArray.cc (MArray::operator+, MArray::operator-, MArray::product_eq,
MArray::quotient_eq): Change calling form for do_mm_in_place_op.
* boolNDArray.cc (boolNDArray::mx_el_and_assign,
boolNDArray::mx_el_or_assign): Ditto
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Thu, 15 Sep 2011 05:11:46 -0500 |
parents | 52c5799130c2 |
children | 98d23b0f16e1 |
files | liboctave/MArray.cc liboctave/boolNDArray.cc liboctave/bsxfun-defs.cc liboctave/bsxfun.h liboctave/mx-inlines.cc |
diffstat | 5 files changed, 115 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/liboctave/MArray.cc +++ b/liboctave/MArray.cc @@ -264,7 +264,7 @@ if (a.is_shared ()) a = a + b; else - do_mm_inplace_op<T, T> (a, b, mx_inline_add2, "+="); + do_mm_inplace_op<T, T> (a, b, mx_inline_add2, mx_inline_add2, "+="); return a; } @@ -275,7 +275,7 @@ if (a.is_shared ()) a = a - b; else - do_mm_inplace_op<T, T> (a, b, mx_inline_sub2, "-="); + do_mm_inplace_op<T, T> (a, b, mx_inline_sub2, mx_inline_sub2, "-="); return a; } @@ -287,7 +287,7 @@ if (a.is_shared ()) return a = product (a, b); else - do_mm_inplace_op<T, T> (a, b, mx_inline_mul2, ".*="); + do_mm_inplace_op<T, T> (a, b, mx_inline_mul2, mx_inline_mul2, ".*="); return a; } @@ -298,7 +298,7 @@ if (a.is_shared ()) return a = quotient (a, b); else - do_mm_inplace_op<T, T> (a, b, mx_inline_div2, "./="); + do_mm_inplace_op<T, T> (a, b, mx_inline_div2, mx_inline_div2, "./="); return a; }
--- a/liboctave/boolNDArray.cc +++ b/liboctave/boolNDArray.cc @@ -149,7 +149,8 @@ if (a.is_shared ()) a = mx_el_and (a, b); else - do_mm_inplace_op<bool, bool> (a, b, mx_inline_and2, "operator &="); + do_mm_inplace_op<bool, bool> (a, b, mx_inline_and2, mx_inline_and2, + "operator &="); return a; } @@ -160,7 +161,8 @@ if (a.is_shared ()) a = mx_el_or (a, b); else - do_mm_inplace_op<bool, bool> (a, b, mx_inline_or2, "operator |="); + do_mm_inplace_op<bool, bool> (a, b, mx_inline_or2, mx_inline_or2, + "operator |="); return a; }
--- a/liboctave/bsxfun-defs.cc +++ b/liboctave/bsxfun-defs.cc @@ -134,6 +134,77 @@ return retval; } +template <class R, class X> +void +do_inplace_bsxfun_op (Array<R>& r, const Array<X>& x, + void (*op_vv) (size_t, R *, const X *), + void (*op_vs) (size_t, R *, X)) +{ + dim_vector dvr = r.dims (), dvx = x.dims (); + octave_idx_type nd = r.ndims (); + dvx.redim (nd); + + const X* xvec = x.fortran_vec (); + R* rvec = r.fortran_vec (); + + // Fold the common leading dimensions. + octave_idx_type start, ldr = 1; + for (start = 0; start < nd; start++) + { + if (dvr(start) != dvx(start)) + break; + ldr *= dvr(start); + } + + if (r.is_empty ()) + ; // do nothing + else if (start == nd) + op_vv (r.numel (), rvec, xvec); + else + { + // Determine the type of the low-level loop. + bool xsing = false; + if (ldr == 1) + { + xsing = dvx(start) == 1; + if (xsing) + { + ldr *= dvr(start) * dvx(start); + start++; + } + } + + dim_vector cdvx = dvx.cumulative (); + // Nullify singleton dims to achieve a spread effect. + for (int i = std::max (start, 1); i < nd; i++) + { + if (dvx(i) == 1) + cdvx(i-1) = 0; + } + + octave_idx_type niter = dvr.numel (start); + // The index array. + OCTAVE_LOCAL_BUFFER_INIT (octave_idx_type, idx, nd, 0); + for (octave_idx_type iter = 0; iter < niter; iter++) + { + octave_quit (); + + // Compute indices. + // FIXME: performance impact noticeable? + octave_idx_type xidx = cdvx.cum_compute_index (idx); + octave_idx_type ridx = dvr.compute_index (idx); + + // Apply the low-level loop. + if (xsing) + op_vs (ldr, rvec + ridx, xvec[xidx]); + else + op_vv (ldr, rvec + ridx, xvec + xidx); + + dvr.increment_index (idx + start, start); + } + } +} + #define BSXFUN_OP_DEF(OP, ARRAY) \ ARRAY bsxfun_ ## OP (const ARRAY& x, const ARRAY& y)
--- a/liboctave/bsxfun.h +++ b/liboctave/bsxfun.h @@ -33,7 +33,31 @@ { for (int i = 0; i < std::min (dx.length (), dy.length ()); i++) { - if ( dx(i) > 1 && dy(i) > 1 && dx(i) != dy(i)) + octave_idx_type xk = dx(i), yk = dy(i); + // Check the three conditions for valid bsxfun dims + if (! ( (xk == yk) || (xk == 1 && yk > 1) || (xk > 1 && yk == 1))) + return false; + } + return true; +} + +// since we can't change the size of the assigned-to matrix, we cannot +// apply singleton expansion to it, so the conditions to check are +// different here. +inline +bool +is_valid_inplace_bsxfun (const dim_vector& dr, const dim_vector& dx) +{ + octave_idx_type drl = dr.length (), dxl = dx.length (); + if (drl < dxl) + return false; + + for (int i = 0; i < drl; i++) + { + octave_idx_type rk = dr(i), xk = dx(i); + + // Only two valid canditions to check; can't stretch rk + if (! ( (rk == xk) || (rk > 1 && xk == 1))) return false; } return true;
--- a/liboctave/mx-inlines.cc +++ b/liboctave/mx-inlines.cc @@ -169,6 +169,9 @@ for (size_t i = 0; i < n; i++) \ r[i] OP logical_value (x[i]); \ } \ +template <class X> \ +inline void F (size_t n, bool *r, X x) throw () \ +{ for (size_t i = 0; i < n; i++) r[i] OP x; } DEFMXBOOLOPEQ (mx_inline_and2, &=) DEFMXBOOLOPEQ (mx_inline_or2, |=) @@ -391,11 +394,18 @@ inline Array<R>& do_mm_inplace_op (Array<R>& r, const Array<X>& x, void (*op) (size_t, R *, const X *) throw (), + void (*op1) (size_t, R *, X) throw (), const char *opname) { dim_vector dr = r.dims (), dx = x.dims (); if (dr == dx) - op (r.length (), r.fortran_vec (), x.data ()); + { + op (r.length (), r.fortran_vec (), x.data ()); + } + else if (is_valid_inplace_bsxfun (dr, dx)) + { + do_inplace_bsxfun_op (r, x, op, op1); + } else gripe_nonconformant (opname, dr, dx); return r;