Mercurial > hg > octave-nkf
diff liboctave/bsxfun-defs.cc @ 13139:aa4a23337a0f
Enable BSX in-place for missing assignment operators
* bsxfun-defs.cc (do_inplace_bsxfun_op): New function.
* bsxfun.h (is_valid_bsxfun): Fix logic, had bug with empty
dimensions. (is_valid_inplace_bsxfun): New function.
* mx-inlines.cc (DEFMXBOOLOPEQ): Add missing function for
vector-by-scalar operation. (do_mm_inplace_op): Call new
inplace_bsxfun functions.
* MArray.cc (MArray::operator+, MArray::operator-, MArray::product_eq,
MArray::quotient_eq): Change calling form for do_mm_in_place_op.
* boolNDArray.cc (boolNDArray::mx_el_and_assign,
boolNDArray::mx_el_or_assign): Ditto
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Thu, 15 Sep 2011 05:11:46 -0500 |
parents | 15eefbd9d4e8 |
children | 782dc237a02d |
line wrap: on
line diff
--- a/liboctave/bsxfun-defs.cc +++ b/liboctave/bsxfun-defs.cc @@ -134,6 +134,77 @@ return retval; } +template <class R, class X> +void +do_inplace_bsxfun_op (Array<R>& r, const Array<X>& x, + void (*op_vv) (size_t, R *, const X *), + void (*op_vs) (size_t, R *, X)) +{ + dim_vector dvr = r.dims (), dvx = x.dims (); + octave_idx_type nd = r.ndims (); + dvx.redim (nd); + + const X* xvec = x.fortran_vec (); + R* rvec = r.fortran_vec (); + + // Fold the common leading dimensions. + octave_idx_type start, ldr = 1; + for (start = 0; start < nd; start++) + { + if (dvr(start) != dvx(start)) + break; + ldr *= dvr(start); + } + + if (r.is_empty ()) + ; // do nothing + else if (start == nd) + op_vv (r.numel (), rvec, xvec); + else + { + // Determine the type of the low-level loop. + bool xsing = false; + if (ldr == 1) + { + xsing = dvx(start) == 1; + if (xsing) + { + ldr *= dvr(start) * dvx(start); + start++; + } + } + + dim_vector cdvx = dvx.cumulative (); + // Nullify singleton dims to achieve a spread effect. + for (int i = std::max (start, 1); i < nd; i++) + { + if (dvx(i) == 1) + cdvx(i-1) = 0; + } + + octave_idx_type niter = dvr.numel (start); + // The index array. + OCTAVE_LOCAL_BUFFER_INIT (octave_idx_type, idx, nd, 0); + for (octave_idx_type iter = 0; iter < niter; iter++) + { + octave_quit (); + + // Compute indices. + // FIXME: performance impact noticeable? + octave_idx_type xidx = cdvx.cum_compute_index (idx); + octave_idx_type ridx = dvr.compute_index (idx); + + // Apply the low-level loop. + if (xsing) + op_vs (ldr, rvec + ridx, xvec[xidx]); + else + op_vv (ldr, rvec + ridx, xvec + xidx); + + dvr.increment_index (idx + start, start); + } + } +} + #define BSXFUN_OP_DEF(OP, ARRAY) \ ARRAY bsxfun_ ## OP (const ARRAY& x, const ARRAY& y)