Mercurial > hg > octave-max
changeset 10268:9a16a61ed43d
new optimizations for accumarray
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Fri, 05 Feb 2010 12:09:21 +0100 |
parents | 479c7df0cc96 |
children | 217d36560dfa |
files | liboctave/ChangeLog liboctave/MArrayN.cc liboctave/MArrayN.h liboctave/lo-mappers.h scripts/ChangeLog scripts/general/accumarray.m src/ChangeLog src/data.cc |
diffstat | 8 files changed, 366 insertions(+), 76 deletions(-) [+] |
line wrap: on
line diff
--- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,3 +1,9 @@ +2010-02-05 Jaroslav Hajek <highegg@gmail.com> + + * MArrayN.cc (MArrayN::idx_min, MArrayN::idx_max): New methods. + * MArrayN.h: Declare them. + * lo-mappers.h (xmin, xmax): Define for general arguments. + 2010-02-04 Jaroslav Hajek <highegg@gmail.com> * chMatrix.h (charMatrix): Rebase directly on Array<char>.
--- a/liboctave/MArrayN.cc +++ b/liboctave/MArrayN.cc @@ -87,6 +87,52 @@ idx.loop (len, _idxadda_helper<T> (this->fortran_vec (), vals.data ())); } +template <class T, T op (typename ref_param<T>::type, typename ref_param<T>::type)> +struct _idxbinop_helper +{ + T *array; + const T *vals; + _idxbinop_helper (T *a, const T *v) : array (a), vals (v) { } + void operator () (octave_idx_type i) + { array[i] = op (array[i], *vals++); } +}; + +template <class T> +void +MArrayN<T>::idx_min (const idx_vector& idx, const MArrayN<T>& vals) +{ + octave_idx_type n = this->length (); + octave_idx_type ext = idx.extent (n); + if (ext > n) + { + this->resize (ext); + n = ext; + } + + octave_quit (); + + octave_idx_type len = std::min (idx.length (n), vals.length ()); + idx.loop (len, _idxbinop_helper<T, xmin> (this->fortran_vec (), vals.data ())); +} + +template <class T> +void +MArrayN<T>::idx_max (const idx_vector& idx, const MArrayN<T>& vals) +{ + octave_idx_type n = this->length (); + octave_idx_type ext = idx.extent (n); + if (ext > n) + { + this->resize (ext); + n = ext; + } + + octave_quit (); + + octave_idx_type len = std::min (idx.length (n), vals.length ()); + idx.loop (len, _idxbinop_helper<T, xmax> (this->fortran_vec (), vals.data ())); +} + // N-dimensional array with math ops. template <class T> void
--- a/liboctave/MArrayN.h +++ b/liboctave/MArrayN.h @@ -93,6 +93,10 @@ void idx_add (const idx_vector& idx, const MArrayN<T>& vals); + void idx_min (const idx_vector& idx, const MArrayN<T>& vals); + + void idx_max (const idx_vector& idx, const MArrayN<T>& vals); + void changesign (void); };
--- a/liboctave/lo-mappers.h +++ b/liboctave/lo-mappers.h @@ -69,6 +69,14 @@ extern OCTAVE_API bool octave_is_NA (double x); extern OCTAVE_API bool octave_is_NaN_or_NA (double x) GCC_ATTR_DEPRECATED; +// Generic xmin, xmax definitions +template <class T> +inline T xmin (T x, T y) +{ return x <= y ? x : y; } +template <class T> +inline T xmax (T x, T y) +{ return x >= y ? x : y; } + // This form is favorable. GCC will translate (x <= y ? x : y) without a jump, // hence the only conditional jump involved will be the first (xisnan), infrequent // and hence friendly to branch prediction.
--- a/scripts/ChangeLog +++ b/scripts/ChangeLog @@ -1,3 +1,9 @@ +2010-02-05 Jaroslav Hajek <highegg@gmail.com> + + * general/accumarray.m: Rewrite. Split sparse and dense case. Treat + cell-valued subs efficiently. Optimize dense case for @sum, @max and + @min. Optimize the @(x){x} reduction. Add tests. + 2010-02-04 Jaroslav Hajek <highegg@gmail.com> * miscellaneous/dir.m: Fix month passed to datenum.
--- a/scripts/general/accumarray.m +++ b/scripts/general/accumarray.m @@ -54,65 +54,107 @@ ## @end example ## @end deftypefn -function A = accumarray (subs, val, sz, func, fillval, isspar) +function A = accumarray (subs, val, sz = [], func = [], fillval = [], isspar = []) if (nargin < 2 || nargin > 6) print_usage (); endif if (iscell (subs)) - subs = cell2mat (cellfun (@(x) x(:), subs, "UniformOutput", false)); + subs = cellfun (@(x) x(:), subs, "UniformOutput", false); + ndims = numel (subs); + if (ndims == 1) + subs = subs{1}; + endif + else + ndims = columns (subs); endif - ndims = size (subs, 2); - if (nargin < 5 || isempty (fillval)) + if (isempty (fillval)) fillval = 0; endif - if (nargin < 6 || isempty (isspar)) + if (isempty (isspar)) isspar = false; endif - if (isspar && ndims > 2) - error ("accumarray: sparse matrices limited to 2 dimensions"); - endif + if (isspar) + + ## Sparse case. Avoid linearizing the subscripts, because it could overflow. + + if (fillval != 0) + error ("accumarray: fillval must be zero in the sparse case"); + endif + + ## Ensure subscripts are a two-column matrix. + if (iscell (subs)) + subs = [subs{:}]; + endif - if (nargin < 4 || isempty (func)) - func = @sum; - ## This is the fast summation case. Unlike the general case, - ## this case will be handled using an O(N) algorithm. + ## Validate dimensions. + if (ndims == 1) + subs(:,2) = 1; + elseif (ndims != 2) + error ("accumarray: in the sparse case, needs 1 or 2 subscripts"); + endif + + if (isnumeric (val) || islogical (val)) + vals = double (val); + else + error ("accumarray: in the sparse case, values must be numeric or logical"); + endif + + if (! (isempty (func) || func == @sum)) - if (isspar && fillval == 0) - ## The "sparse" function can handle this case. + ## Reduce values. This is not needed if we're about to sum them, because + ## "sparse" can do that. + + ## Sort indices. + [subs, idx] = sortrows (subs); + n = rows (subs); + ## Identify runs. + jdx = find (any (diff (subs, 1, 1), 2)); + jdx = [jdx; n]; + + val = cellfun (func, mat2cell (val(:)(idx), diff ([0; jdx]))); + subs = subs(jdx, :); + endif - if ((nargin < 3 || isempty (sz))) - A = sparse (subs(:,1), subs(:,2), val); - elseif (length (sz) == 2) - A = sparse (subs(:,1), subs(:,2), val, sz(1), sz(2)); - else + ## Form the sparse matrix. + if (isempty (sz)) + A = sparse (subs(:,1), subs(:,2), val); + elseif (length (sz) == 2) + A = sparse (subs(:,1), subs(:,2), val, sz(1), sz(2)); + else + error ("accumarray: dimensions mismatch") + endif + + else + + ## Linearize subscripts. + if (ndims > 1) + if (isempty (sz)) + if (iscell (subs)) + sz = cellfun (@max, subs); + else + sz = max (subs, [], 1); + endif + elseif (ndims != length (sz)) error ("accumarray: dimensions mismatch") endif - else - ## This case is handled by an internal function. - if (ndims > 1) - if ((nargin < 3 || isempty (sz))) - sz = max (subs); - elseif (ndims != length (sz)) - error ("accumarray: dimensions mismatch") - elseif (any (max (subs) > sz)) - error ("accumarray: index out of range") - endif + ## Convert multidimensional subscripts. + if (ismatrix (subs)) + subs = num2cell (subs, 1); + endif + subs = sub2ind (sz, subs{:}); + endif - ## Convert multidimensional subscripts. - subs = sub2ind (sz, num2cell (subs, 1){:}); - elseif (nargin < 3) - ## In case of linear indexing, the fast built-in accumulator - ## will determine the extent for us. - sz = []; - endif + + ## Some built-in reductions handled efficiently. - ## Call the built-in accumulator. + if (isempty (func) || func == @sum) + ## Fast summation. if (isempty (sz)) A = __accumarray_sum__ (subs, val); else @@ -127,47 +169,87 @@ mask(subs) = false; A(mask) = fillval; endif - endif + elseif (func == @max) + ## Fast maximization. - return - endif + if (isinteger (val)) + zero = intmin (class (val)); + elseif (fillval == 0 && all (val(:) >= 0)) + ## This is a common case - fillval is zero, all numbers nonegative. + zero = 0; + else + zero = NaN; # Neutral value. + endif + + if (isempty (sz)) + A = __accumarray_max__ (subs, val, zero); + else + A = __accumarray_max__ (subs, val, zero, prod (sz)); + A = reshape (A, sz); + endif - if (nargin < 3 || isempty (sz)) - sz = max (subs); - if (isscalar(sz)) - sz = [sz, 1]; - endif - elseif (length (sz) != ndims - && (ndims != 1 || length (sz) != 2 || sz(2) != 1)) - error ("accumarray: inconsistent dimensions"); - endif - - [subs, idx] = sortrows (subs); + if (fillval != zero && isnan (fillval) != isnan (zero)) + mask = true (size (A)); + mask(subs) = false; + A(mask) = fillval; + endif + elseif (func == @min) + ## Fast minimization. + + if (isinteger (val)) + zero = intmax (class (val)); + else + zero = NaN; # Neutral value. + endif + + if (isempty (sz)) + A = __accumarray_min__ (subs, val, zero); + else + A = __accumarray_min__ (subs, val, zero, prod (sz)); + A = reshape (A, sz); + endif - if (isscalar (val)) - val = repmat (size (idx)); - else - val = val(idx); - endif - cidx = find ([true; (sum (abs (diff (subs)), 2) != 0)]); - idx = cell (1, ndims); - for i = 1:ndims - idx{i} = subs (cidx, i); - endfor - x = cellfun (func, mat2cell (val(:), diff ([cidx; length(val) + 1]))); - if (isspar && fillval == 0) - A = sparse (idx{1}, idx{2}, x, sz(1), sz(2)); - else - if (iscell (x)) - ## Why did matlab choose to reverse the order of the elements - x = cellfun (@(x) flipud (x(:)), x, "UniformOutput", false); - A = cell (sz); - elseif (fillval == 0) - A = zeros (sz, class (x)); - else - A = fillval .* ones (sz); + if (fillval != zero && isnan (fillval) != isnan (zero)) + mask = true (size (A)); + mask(subs) = false; + A(mask) = fillval; + endif + else + + ## The general case. Reduce values. + n = rows (subs); + if (numel (val) == 1) + val = val(ones (1, n), 1); + else + val = val(:); + endif + + ## Sort indices. + [subs, idx] = sort (subs); + ## Identify runs. + jdx = find (diff (subs, 1, 1)); + jdx = [jdx; n]; + val = mat2cell (val(idx), diff ([0; jdx])); + ## Optimize the case when function is @(x) {x}, i.e. we just want to + ## collect the values to cells. + persistent simple_cell_str = func2str (@(x) {x}); + if (! strcmp (func2str (func), simple_cell_str)) + val = cellfun (func, val); + endif + subs = subs(jdx); + + ## Construct matrix of fillvals. + if (iscell (val)) + A = cell (sz); + elseif (fillval == 0) + A = zeros (sz, class (val)); + else + A = repmat (fillval, sz); + endif + + ## Set the reduced values. + A(subs) = val; endif - A(sub2ind (sz, idx{:})) = x; endif endfunction @@ -183,4 +265,24 @@ %!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x)length(x)>1),[false,false,false,false;true,false,true,false]) %!test %! A = accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x){x}); -%! assert (A{2},[104;102]) +%! assert (A{2},[102;104]) +%!test +%! subs = ceil (rand (2000, 3)*10); +%! val = rand (2000, 1); +%! assert (accumarray (subs, val, [], @max), accumarray (subs, val, [], @(x) max (x))); +%!test +%! subs = ceil (rand (2000, 1)*100); +%! val = rand (2000, 1); +%! assert (accumarray (subs, val, [100, 1], @min, NaN), accumarray (subs, val, [100, 1], @(x) min (x), NaN)); +%!test +%! subs = ceil (rand (2000, 2)*30); +%! subsc = num2cell (subs, 1); +%! val = rand (2000, 1); +%! assert (accumarray (subsc, val, [], [], 0, true), accumarray (subs, val, [], [], 0, true)); +%!test +%! subs = ceil (rand (2000, 3)*10); +%! subsc = num2cell (subs, 1); +%! val = rand (2000, 1); +%! assert (accumarray (subsc, val, [], @max), accumarray (subs, val, [], @max)); + +
--- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2010-02-05 Jaroslav Hajek <highegg@gmail.com> + + * data.cc (F__accumarray_sum__): Allow bool and char inputs. + (do_accumarray_minmax, do_accumarray_minmax_fun): New helper funcs. + (F__accumarray_min__, F__accumarray_max__): New defuns. + 2010-02-04 John W. Eaton <jwe@octave.org> * sysdep.cc: Don't include <sys/utsname.h>.
--- a/src/data.cc +++ b/src/data.cc @@ -6347,7 +6347,7 @@ else retval = do_accumarray_sum (idx, vals.float_array_value (), n); } - else if (vals.is_numeric_type ()) + else if (vals.is_numeric_type () || vals.is_bool_type () || vals.is_string ()) { if (vals.is_complex_type ()) retval = do_accumarray_sum (idx, vals.complex_array_value (), n); @@ -6365,6 +6365,118 @@ } template <class NDT> +static NDT +do_accumarray_minmax (const idx_vector& idx, const NDT& vals, + octave_idx_type n, bool ismin, + const typename NDT::element_type& zero_val) +{ + typedef typename NDT::element_type T; + if (n < 0) + n = idx.extent (0); + else if (idx.extent (n) > n) + error ("accumarray: index out of range"); + + NDT retval (dim_vector (n, 1), zero_val); + + // Pick minimizer or maximizer. + void (MArrayN<T>::*op) (const idx_vector&, const MArrayN<T>&) = + ismin ? (&MArrayN<T>::idx_min) : (&MArrayN<T>::idx_max); + + octave_idx_type l = idx.length (n); + if (vals.numel () == 1) + (retval.*op) (idx, NDT (dim_vector (l, 1), vals(0))); + else if (vals.numel () == l) + (retval.*op) (idx, vals); + else + error ("accumarray: dimensions mismatch"); + + return retval; +} + +static octave_value_list +do_accumarray_minmax_fun (const octave_value_list& args, + bool ismin) +{ + octave_value retval; + int nargin = args.length (); + if (nargin >= 3 && nargin <= 4 && args(0).is_numeric_type ()) + { + idx_vector idx = args(0).index_vector (); + octave_idx_type n = -1; + if (nargin == 4) + n = args(3).idx_type_value (true); + + if (! error_state) + { + octave_value vals = args(1), zero = args (2); + + switch (vals.builtin_type ()) + { + case btyp_double: + retval = do_accumarray_minmax (idx, vals.array_value (), n, ismin, + zero.double_value ()); + break; + case btyp_float: + retval = do_accumarray_minmax (idx, vals.float_array_value (), n, ismin, + zero.float_value ()); + break; + case btyp_complex: + retval = do_accumarray_minmax (idx, vals.complex_array_value (), n, ismin, + zero.complex_value ()); + break; + case btyp_float_complex: + retval = do_accumarray_minmax (idx, vals.float_complex_array_value (), n, ismin, + zero.float_complex_value ()); + break; +#define MAKE_INT_BRANCH(X) \ + case btyp_ ## X: \ + retval = do_accumarray_minmax (idx, vals.X ## _array_value (), n, ismin, \ + zero.X ## _scalar_value ()); \ + break + + MAKE_INT_BRANCH (int8); + MAKE_INT_BRANCH (int16); + MAKE_INT_BRANCH (int32); + MAKE_INT_BRANCH (int64); + MAKE_INT_BRANCH (uint8); + MAKE_INT_BRANCH (uint16); + MAKE_INT_BRANCH (uint32); + MAKE_INT_BRANCH (uint64); +#undef MAKE_INT_BRANCH + case btyp_bool: + retval = do_accumarray_minmax (idx, vals.array_value (), n, ismin, + zero.bool_value ()); + break; + default: + gripe_wrong_type_arg ("accumarray", vals); + } + } + } + else + print_usage (); + + return retval; +} + +DEFUN (__accumarray_min__, args, , + "-*- texinfo -*-\n\ +@deftypefn {Built-in Function} {} __accumarray_min__ (@var{idx}, @var{vals}, @var{zero}, @var{n})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + return do_accumarray_minmax_fun (args, true); +} + +DEFUN (__accumarray_max__, args, , + "-*- texinfo -*-\n\ +@deftypefn {Built-in Function} {} __accumarray_max__ (@var{idx}, @var{vals}, @var{zero}, @var{n})\n\ +Undocumented internal function.\n\ +@end deftypefn") +{ + return do_accumarray_minmax_fun (args, false); +} + +template <class NDT> static NDT do_merge (const Array<bool>& mask, const NDT& tval, const NDT& fval)