# HG changeset patch # User Jaroslav Hajek # Date 1229419778 -3600 # Node ID ba24ecd4c01966e23e96db382593fb16e3261512 # Parent 7f6efc2b75d93d0ffd38adb08ef27d501228c8b5 optimize permute diff --git a/liboctave/Array.cc b/liboctave/Array.cc --- a/liboctave/Array.cc +++ b/liboctave/Array.cc @@ -419,6 +419,88 @@ return retval; } +// Helper class for multi-d dimension permuting (generalized transpose). +class rec_permute_helper +{ + octave_idx_type *dim, *stride; + int top; + +public: + rec_permute_helper (const dim_vector& dv, const Array& perm) + { + int n = dv.length (); + assert (n == perm.length ()); + + dim = new octave_idx_type [2*n]; + // A hack to avoid double allocation + stride = dim + n; + top = 0; + + // Get cumulative dimensions. + OCTAVE_LOCAL_BUFFER (octave_idx_type, cdim, n+1); + cdim[0] = 1; + for (int i = 1; i < n+1; i++) cdim[i] = cdim[i-1] * dv(i-1); + + int k = 0; + // Reduce leading identity + for (; k < n && perm(k) == k; k++) ; + if (k > 0) + { + dim[0] = cdim[k]; + stride[0] = 1; + } + else + top = -1; + + // Setup the permuted strides. + for (; k < n; k++) + { + ++top; + int kk = perm(k); + dim[top] = dv(kk); + stride[top] = cdim[kk]; + } + + } + + ~rec_permute_helper (void) { delete [] dim; } + +private: + + // Recursive N-d generalized transpose + template + T *do_permute (const T *src, T *dest, int lev) const + { + if (lev == 0) + { + octave_idx_type step = stride[0], len = dim[0]; + if (step == 1) + dest = std::copy (src, src + len, dest); + else + { + for (octave_idx_type i = 0, j = 0; i < len; i++, j += step) + dest[i] = src[j]; + + dest += len; + } + + } + else + { + octave_idx_type step = stride[lev], len = dim[lev]; + for (octave_idx_type i = 0, j = 0; i < len; i++, j+= step) + dest = do_permute (src + i * step, dest, lev-1); + } + + return dest; + } + +public: + + template + void permute (const T *src, T *dest) const { do_permute (src, dest, top); } + +}; template @@ -432,7 +514,7 @@ dim_vector dv = dims (); dim_vector dv_new; - int perm_vec_len = perm_vec.length (); + int perm_vec_len = perm_vec_arg.length (); if (perm_vec_len < dv.length ()) (*current_liboctave_error_handler) @@ -444,13 +526,12 @@ dv.resize (perm_vec_len, 1); // Need this array to check for identical elements in permutation array. - Array checked (perm_vec_len, false); + OCTAVE_LOCAL_BUFFER_INIT (bool, checked, perm_vec_len, false); // Find dimension vector of permuted array. for (int i = 0; i < perm_vec_len; i++) { octave_idx_type perm_elt = perm_vec.elem (i); - if (perm_elt >= perm_vec_len || perm_elt < 0) { (*current_liboctave_error_handler) @@ -460,7 +541,7 @@ return retval; } - if (checked.elem(perm_elt)) + if (checked[perm_elt]) { (*current_liboctave_error_handler) ("%s: permutation vector cannot contain identical elements", @@ -469,86 +550,23 @@ return retval; } else - checked.elem(perm_elt) = true; + checked[perm_elt] = true; dv_new(i) = dv(perm_elt); } - int nd = dv.length (); - - // FIXME -- it would be nice to have a sort method in the - // Array class that also returns the sort indices. - if (inv) { - OCTAVE_LOCAL_BUFFER (permute_vector, pvec, nd); - - for (int i = 0; i < nd; i++) - { - pvec[i].pidx = perm_vec(i); - pvec[i].iidx = i; - } - - octave_qsort (pvec, static_cast (nd), - sizeof (permute_vector), permute_vector_compare); - - for (int i = 0; i < nd; i++) - { - perm_vec(i) = pvec[i].iidx; - dv_new(i) = dv(perm_vec(i)); - } + for (int i = 0; i < perm_vec_len; i++) + perm_vec(perm_vec_arg(i)) = i; } - retval.resize (dv_new); + retval = Array (dv_new); if (numel () > 0) { - Array cp (nd+1, 1); - for (octave_idx_type i = 1; i < nd+1; i++) - cp(i) = cp(i-1) * dv(i-1); - - octave_idx_type incr = cp(perm_vec(0)); - - Array base_delta (nd-1, 0); - Array base_delta_max (nd-1); - Array base_incr (nd-1); - for (octave_idx_type i = 0; i < nd-1; i++) - { - base_delta_max(i) = dv_new(i+1); - base_incr(i) = cp(perm_vec(i+1)); - } - - octave_idx_type nr_new = dv_new(0); - octave_idx_type nel_new = dv_new.numel (); - octave_idx_type n = nel_new / nr_new; - - octave_idx_type k = 0; - - for (octave_idx_type i = 0; i < n; i++) - { - octave_idx_type iidx = 0; - for (octave_idx_type kk = 0; kk < nd-1; kk++) - iidx += base_delta(kk) * base_incr(kk); - - for (octave_idx_type j = 0; j < nr_new; j++) - { - OCTAVE_QUIT; - - retval(k++) = elem(iidx); - iidx += incr; - } - - base_delta(0)++; - - for (octave_idx_type kk = 0; kk < nd-2; kk++) - { - if (base_delta(kk) == base_delta_max(kk)) - { - base_delta(kk) = 0; - base_delta(kk+1)++; - } - } - } + rec_permute_helper rh (dv, perm_vec); + rh.permute (data (), retval.fortran_vec ()); } retval.chop_trailing_singletons (); diff --git a/liboctave/ChangeLog b/liboctave/ChangeLog --- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,3 +1,8 @@ +2008-12-16 Jaroslav Hajek + + * Array.cc (rec_permute_helper): New class. + (Array::permute): Rewrite using the recursive algorithm. + 2008-12-12 David Bateman * sparse-base-chol.cc (inverse): Fix inversion based on cholesky