Mercurial > hg > octave-nkf
changeset 9921:7c8392a034e6
fix & improve lookup API
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Sat, 05 Dec 2009 06:10:41 +0100 |
parents | 56fbe170d354 |
children | 3a8327d51ed4 |
files | liboctave/Array.cc liboctave/Array.h liboctave/ChangeLog liboctave/oct-sort.cc liboctave/oct-sort.h src/ChangeLog src/DLD-FUNCTIONS/lookup.cc |
diffstat | 7 files changed, 192 insertions(+), 310 deletions(-) [+] |
line wrap: on
line diff
--- a/liboctave/Array.cc +++ b/liboctave/Array.cc @@ -2242,9 +2242,7 @@ { Array<octave_idx_type> idx; - octave_sort<T> lsort; - - lsort.set_compare (safe_comparator (mode, *this, true)); + octave_sort<T> lsort (safe_comparator (mode, *this, true)); octave_idx_type r = rows (), c = cols (); @@ -2336,14 +2334,11 @@ return lsort.lookup (data (), n, value); } -// Ditto, but for an array of values, specializing on long runs. -// Adds optional offset to all indices. template <class T> Array<octave_idx_type> -Array<T>::lookup (const Array<T>& values, sortmode mode, - bool linf, bool rinf) const +Array<T>::lookup (const Array<T>& values, sortmode mode) const { - octave_idx_type n = numel (); + octave_idx_type n = numel (), nval = values.numel (); octave_sort<T> lsort; Array<octave_idx_type> idx (values.dims ()); @@ -2358,74 +2353,30 @@ lsort.set_compare (mode); - // set offset and shift size. - octave_idx_type offset = 0; - - if (linf && n > 0) + // This determines the split ratio between the O(M*log2(N)) and O(M+N) algorithms. + static const double ratio = 1.0; + sortmode vmode = UNSORTED; + + // Attempt the O(M+N) algorithm if M is large enough. + if (nval > ratio * n / xlog2 (n + 1.0)) { - offset++; - n--; + vmode = values.is_sorted (); + // The table must not contain a NaN. + if ((vmode == ASCENDING && sort_isnan<T> (values(nval-1))) + || (vmode == DESCENDING && sort_isnan<T> (values(0)))) + vmode = UNSORTED; } - if (rinf && n > 0) - n--; - - lsort.lookup (data () + offset, n, values.data (), values.numel (), - idx.fortran_vec (), offset); + + if (vmode != UNSORTED) + lsort.lookup_sorted (data (), n, values.data (), nval, + idx.fortran_vec (), vmode != mode); + else + lsort.lookup (data (), n, values.data (), nval, idx.fortran_vec ()); return idx; } template <class T> -Array<octave_idx_type> -Array<T>::lookupm (const Array<T>& values, sortmode mode) const -{ - octave_idx_type n = numel (); - octave_sort<T> lsort; - Array<octave_idx_type> idx (values.dims ()); - - if (mode == UNSORTED) - { - // auto-detect mode - if (n > 1 && lsort.descending_compare (elem (0), elem (n-1))) - mode = DESCENDING; - else - mode = ASCENDING; - } - - lsort.set_compare (mode); - - lsort.lookupm (data (), n, values.data (), values.numel (), - idx.fortran_vec ()); - - return idx; -} - -template <class T> -Array<bool> -Array<T>::lookupb (const Array<T>& values, sortmode mode) const -{ - octave_idx_type n = numel (); - octave_sort<T> lsort; - Array<bool> match (values.dims ()); - - if (mode == UNSORTED) - { - // auto-detect mode - if (n > 1 && lsort.descending_compare (elem (0), elem (n-1))) - mode = DESCENDING; - else - mode = ASCENDING; - } - - lsort.set_compare (mode); - - lsort.lookupb (data (), n, values.data (), values.numel (), - match.fortran_vec ()); - - return match; -} - -template <class T> octave_idx_type Array<T>::nnz (void) const { @@ -2700,14 +2651,8 @@ Array<T>::lookup (T const &, sortmode) const \ { return 0; } \ template <> Array<octave_idx_type> \ -Array<T>::lookup (const Array<T>&, sortmode, bool, bool) const \ +Array<T>::lookup (const Array<T>&, sortmode) const \ { return Array<octave_idx_type> (); } \ -template <> Array<octave_idx_type> \ -Array<T>::lookupm (const Array<T>&, sortmode) const \ -{ return Array<octave_idx_type> (); } \ -template <> Array<bool> \ -Array<T>::lookupb (const Array<T>&, sortmode) const \ -{ return Array<bool> (); } \ \ template <> octave_idx_type \ Array<T>::nnz (void) const\
--- a/liboctave/Array.h +++ b/liboctave/Array.h @@ -622,24 +622,13 @@ // Ordering is auto-detected or can be specified. sortmode is_sorted_rows (sortmode mode = UNSORTED) const; - // Do a binary lookup in a sorted array. + // Do a binary lookup in a sorted array. Must not contain NaNs. // Mode can be specified or is auto-detected by comparing 1st and last element. octave_idx_type lookup (const T& value, sortmode mode = UNSORTED) const; - // Ditto, but for an array of values, specializing on long runs. - // If linf is true, the leftmost interval is extended to infinity - // (indices will be >= 1). - // If rinf is true, the rightmost interval is extended to infinity - // (indices will be <= length ()-1). - Array<octave_idx_type> lookup (const Array<T>& values, sortmode mode = UNSORTED, - bool linf = false, bool rinf = false) const; - - // This looks up only exact matches, giving their indices. Non-exact matches get - // the value -1. - Array<octave_idx_type> lookupm (const Array<T>& values, sortmode mode = UNSORTED) const; - - // This looks up only exact matches, returning true/false if match. - Array<bool> lookupb (const Array<T>& values, sortmode mode = UNSORTED) const; + // Ditto, but for an array of values, specializing on the case when values + // are sorted. NaNs get the value N. + Array<octave_idx_type> lookup (const Array<T>& values, sortmode mode = UNSORTED) const; // Count nonzero elements. octave_idx_type nnz (void) const;
--- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,3 +1,22 @@ +2009-12-05 Jaroslav Hajek <highegg@gmail.com> + + * oct-sort.cc (lookup_binary): Remove. + (octave_sort<T>::lookup (const T*, octave_idx_type, const T&, comp)): Move + code here. + (octave_sort<T>::lookup (const T*, octave_idx_type, const T*, + octave_idx_type, octave_idx_type *, comp)): Remove offset parameter. + Use a simple sequence of lookups. + (octave_sort<T>::lookup (const T*, octave_idx_type, const T*, + octave_idx_type, octave_idx_type *)): Update. + (octave_sort<T>::lookupm, octave_sort<T>::lookupb): Remove. + (octave_sort<T>::lookup_sorted): New overloaded method. + + * Array.cc (Array<T>::lookup (const Array<T>&, sortmode)): Remove + linf & rinf params. Rewrite using is_sorted to check for sortedness, + call octave_sort::lookup_sorted to do the sorted merge. + (Array<T>::lookupm, Array<T>::lookupb): Remove. + (NO_INSTANTIATE_ARRAY_SORT): Update. + 2009-12-05 Jaroslav Hajek <highegg@gmail.com> * Array.cc (sortrows_comparator): Rename to safe_comparator.
--- a/liboctave/oct-sort.cc +++ b/liboctave/oct-sort.cc @@ -1743,17 +1743,18 @@ } // The simple binary lookup. -template <class T, class Comp> -inline octave_idx_type -lookup_binary (const T *data, octave_idx_type hi, - const T& val, Comp comp) + +template <class T> template <class Comp> +octave_idx_type +octave_sort<T>::lookup (const T *data, octave_idx_type nel, + const T& value, Comp comp) { - octave_idx_type lo = 0; + octave_idx_type lo = 0, hi = nel; while (lo < hi) { octave_idx_type mid = lo + ((hi-lo) >> 1); - if (comp (val, data[mid])) + if (comp (value, data[mid])) hi = mid; else lo = mid + 1; @@ -1762,14 +1763,6 @@ return lo; } -template <class T> template <class Comp> -octave_idx_type -octave_sort<T>::lookup (const T *data, octave_idx_type nel, - const T& value, Comp comp) -{ - return lookup_binary (data, nel, value, comp); -} - template <class T> octave_idx_type octave_sort<T>::lookup (const T *data, octave_idx_type nel, @@ -1793,30 +1786,57 @@ return retval; } -// This determines the split ratio between the O(M*log2(N)) and O(M+N) algorithms. -static const double ratio = 1.0; - template <class T> template <class Comp> void octave_sort<T>::lookup (const T *data, octave_idx_type nel, const T *values, octave_idx_type nvalues, - octave_idx_type *idx, octave_idx_type offset, Comp comp) + octave_idx_type *idx, Comp comp) +{ + // Use a sequence of binary lookups. + // TODO: Can this be sped up generally? The sorted merge case is dealt with + // elsewhere. + for (octave_idx_type j = 0; j < nvalues; j++) + idx[j] = lookup (data, nel, values[j], comp); +} + +template <class T> +void +octave_sort<T>::lookup (const T *data, octave_idx_type nel, + const T* values, octave_idx_type nvalues, + octave_idx_type *idx) { - // Check whether we're comparing two sorted arrays, comparable in size. - if (nvalues >= ratio * nel / xlog2 (nel + 1.0) - && is_sorted (values, nvalues, comp)) +#ifdef INLINE_ASCENDING_SORT + if (compare == ascending_compare) + lookup (data, nel, values, nvalues, idx, std::less<T> ()); + else +#endif +#ifdef INLINE_DESCENDING_SORT + if (compare == descending_compare) + lookup (data, nel, values, nvalues, idx, std::greater<T> ()); + else +#endif + if (compare) + lookup (data, nel, values, nvalues, idx, std::ptr_fun (compare)); +} + +template <class T> template <class Comp> +void +octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel, + const T *values, octave_idx_type nvalues, + octave_idx_type *idx, bool rev, Comp comp) +{ + if (rev) { - // Use the linear algorithm. - octave_idx_type i = 0, j = 0; + octave_idx_type i = 0, j = nvalues - 1; - if (j != nvalues && i != nel) + if (nvalues > 0 && nel > 0) { while (true) { if (comp (values[j], data[i])) { - idx[j] = i + offset; - if (++j == nvalues) + idx[j] = i; + if (--j < 0) break; } else if (++i == nel) @@ -1824,58 +1844,20 @@ } } - for (; j != nvalues; j++) - idx[j] = i + offset; - + for (; j >= 0; j--) + idx[j] = i; } else { - // Use a sequence of binary lookups. - for (octave_idx_type j = 0; j < nvalues; j++) - idx[j] = lookup_binary (data, nel, values[j], comp) + offset; - } -} - -template <class T> -void -octave_sort<T>::lookup (const T *data, octave_idx_type nel, - const T* values, octave_idx_type nvalues, - octave_idx_type *idx, octave_idx_type offset) -{ -#ifdef INLINE_ASCENDING_SORT - if (compare == ascending_compare) - lookup (data, nel, values, nvalues, idx, offset, std::less<T> ()); - else -#endif -#ifdef INLINE_DESCENDING_SORT - if (compare == descending_compare) - lookup (data, nel, values, nvalues, idx, offset, std::greater<T> ()); - else -#endif - if (compare) - lookup (data, nel, values, nvalues, idx, offset, std::ptr_fun (compare)); -} - -template <class T> template <class Comp> -void -octave_sort<T>::lookupm (const T *data, octave_idx_type nel, - const T *values, octave_idx_type nvalues, - octave_idx_type *idx, Comp comp) -{ - // Check whether we're comparing two sorted arrays, comparable in size. - if (nvalues >= ratio * nel / xlog2 (nel + 1.0) - && is_sorted (values, nvalues, comp)) - { - // Use the linear algorithm. octave_idx_type i = 0, j = 0; - if (j != nvalues && i != nel) + if (nvalues > 0 && nel > 0) { while (true) { if (comp (values[j], data[i])) { - idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1; + idx[j] = i; if (++j == nvalues) break; } @@ -1885,101 +1867,28 @@ } for (; j != nvalues; j++) - idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1; - - } - else - { - // Use a sequence of binary lookups. - for (octave_idx_type j = 0; j < nvalues; j++) - { - octave_idx_type i = lookup_binary (data, nel, values[j], comp); - idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1; - } + idx[j] = i; } } template <class T> void -octave_sort<T>::lookupm (const T *data, octave_idx_type nel, - const T* values, octave_idx_type nvalues, - octave_idx_type *idx) +octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel, + const T* values, octave_idx_type nvalues, + octave_idx_type *idx, bool rev) { #ifdef INLINE_ASCENDING_SORT if (compare == ascending_compare) - lookupm (data, nel, values, nvalues, idx, std::less<T> ()); + lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ()); else #endif #ifdef INLINE_DESCENDING_SORT if (compare == descending_compare) - lookupm (data, nel, values, nvalues, idx, std::greater<T> ()); + lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ()); else #endif if (compare) - lookupm (data, nel, values, nvalues, idx, std::ptr_fun (compare)); -} - -template <class T> template <class Comp> -void -octave_sort<T>::lookupb (const T *data, octave_idx_type nel, - const T *values, octave_idx_type nvalues, - bool *match, Comp comp) -{ - // Check whether we're comparing two sorted arrays, comparable in size. - if (nvalues >= ratio * nel / xlog2 (nel + 1.0) - && is_sorted (values, nvalues, comp)) - { - // Use the linear algorithm. - octave_idx_type i = 0, j = 0; - - if (j != nvalues && i != nel) - { - while (true) - { - if (comp (values[j], data[i])) - { - match[j] = (i != 0 && ! comp (data[i-1], values[j])); - if (++j == nvalues) - break; - } - else if (++i == nel) - break; - } - } - - for (; j != nvalues; j++) - match[j] = (i != 0 && ! comp (data[i-1], values[j])); - - } - else - { - // Use a sequence of binary lookups. - for (octave_idx_type j = 0; j < nvalues; j++) - { - octave_idx_type i = lookup_binary (data, nel, values[j], comp); - match[j] = (i != 0 && ! comp (data[i-1], values[j])); - } - } -} - -template <class T> -void -octave_sort<T>::lookupb (const T *data, octave_idx_type nel, - const T* values, octave_idx_type nvalues, - bool *match) -{ -#ifdef INLINE_ASCENDING_SORT - if (compare == ascending_compare) - lookupb (data, nel, values, nvalues, match, std::less<T> ()); - else -#endif -#ifdef INLINE_DESCENDING_SORT - if (compare == descending_compare) - lookupb (data, nel, values, nvalues, match, std::greater<T> ()); - else -#endif - if (compare) - lookupb (data, nel, values, nvalues, match, std::ptr_fun (compare)); + lookup_sorted (data, nel, values, nvalues, idx, rev, std::ptr_fun (compare)); } template <class T> template <class Comp>
--- a/liboctave/oct-sort.h +++ b/liboctave/oct-sort.h @@ -142,22 +142,16 @@ octave_idx_type lookup (const T *data, octave_idx_type nel, const T& value); - // Ditto, but for an array of values, specializing on long runs. - // Adds offset to all indices. + // Ditto, but for an array. void lookup (const T *data, octave_idx_type nel, const T* values, octave_idx_type nvalues, - octave_idx_type *idx, octave_idx_type offset = 0); + octave_idx_type *idx); - // Lookup an array of values, only returning indices of - // exact matches. Non-matches are returned as -1. - void lookupm (const T *data, octave_idx_type nel, - const T* values, octave_idx_type nvalues, - octave_idx_type *idx); - - // Lookup an array of values, only indicating exact matches. - void lookupb (const T *data, octave_idx_type nel, - const T* values, octave_idx_type nvalues, - bool *match); + // A linear merge of two sorted tables. rev indicates the second table is + // in reverse order. + void lookup_sorted (const T *data, octave_idx_type nel, + const T* values, octave_idx_type nvalues, + octave_idx_type *idx, bool rev = false); // Rearranges the array so that the elements with indices // lo..up-1 are in their correct place. @@ -316,17 +310,12 @@ template <class Comp> void lookup (const T *data, octave_idx_type nel, const T* values, octave_idx_type nvalues, - octave_idx_type *idx, octave_idx_type offset, Comp comp); + octave_idx_type *idx, Comp comp); template <class Comp> - void lookupm (const T *data, octave_idx_type nel, - const T* values, octave_idx_type nvalues, - octave_idx_type *idx, Comp comp); - - template <class Comp> - void lookupb (const T *data, octave_idx_type nel, - const T* values, octave_idx_type nvalues, - bool *match, Comp comp); + void lookup_sorted (const T *data, octave_idx_type nel, + const T* values, octave_idx_type nvalues, + octave_idx_type *idx, bool rev, Comp comp); template <class Comp> void nth_element (T *data, octave_idx_type nel,
--- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2009-12-05 Jaroslav Hajek <highegg@gmail.com> + + * DLD-FUNCTIONS/lookup.cc (do_numeric_lookup): Rewrite. + (Flookup): Simplify string part. Use Array<std::string>::lookup. + 2009-12-04 John W. Eaton <jwe@octave.org> * DLD-FUNCTIONS/urlwrite.cc (curl_handle::init): Always use
--- a/src/DLD-FUNCTIONS/lookup.cc +++ b/src/DLD-FUNCTIONS/lookup.cc @@ -111,24 +111,61 @@ { octave_value retval; + Array<octave_idx_type> idx = array.lookup (values); + octave_idx_type n = array.numel (), nval = values.numel (); + + // Post-process. if (match_bool) { - boolNDArray match = Array<bool> (array.lookupb (values)); + boolNDArray match (idx.dims ()); + for (octave_idx_type i = 0; i < nval; i++) + { + octave_idx_type j = idx.xelem (i); + match.xelem (i) = j != 0 && values(i) == array(j-1); + } + retval = match; } + else if (match_idx || left_inf || right_inf) + { + NDArray ridx (idx.dims ()); + if (match_idx) + { + for (octave_idx_type i = 0; i < nval; i++) + { + octave_idx_type j = idx.xelem (i); + ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0; + } + } + else if (left_inf && right_inf) + { + for (octave_idx_type i = 0; i < nval; i++) + { + octave_idx_type j = idx.xelem (i); + ridx.xelem (i) = std::min (std::max (1, j), n-1); + } + } + else if (left_inf) + { + for (octave_idx_type i = 0; i < nval; i++) + { + octave_idx_type j = idx.xelem (i); + ridx.xelem (i) = std::max (1, j); + } + } + else if (right_inf) + { + for (octave_idx_type i = 0; i < nval; i++) + { + octave_idx_type j = idx.xelem (i); + ridx.xelem (i) = std::min (j, n-1); + } + } + + retval = ridx; + } else - { - Array<octave_idx_type> idx; - - if (match_idx) - idx = array.lookupm (values); - else - idx = array.lookup (values, UNSORTED, left_inf, right_inf); - - // if left_inf is specified, the result is a valid 1-based index. - bool cache_index = left_inf && array.numel () > 1; - retval = octave_value (idx, match_idx, cache_index); - } + retval = idx; return retval; } @@ -262,30 +299,14 @@ } else if (str_case) { - Array<std::string> str_table = table.cellstr_value (); - - // Here we'll use octave_sort directly to avoid converting the array - // for case-insensitive comparison. - - - // check for case-insensitive option - if (nargin == 3) + // FIXME: this should be handled directly. + if (icase) { - std::string opt = args(2).string_value (); + table = table.xtoupper (); + y = y.xtoupper (); } - sortmode mode = (icase ? get_sort_mode (str_table, stri_comp_gt) - : get_sort_mode (str_table)); - - bool (*str_comp) (const std::string&, const std::string&); - - // pick the correct comparator - if (mode == DESCENDING) - str_comp = icase ? stri_comp_gt : octave_sort<std::string>::descending_compare; - else - str_comp = icase ? stri_comp_lt : octave_sort<std::string>::ascending_compare; - - octave_sort<std::string> lsort (str_comp); + Array<std::string> str_table = table.cellstr_value (); Array<std::string> str_y (1); if (y.is_cellstr ()) @@ -293,32 +314,37 @@ else str_y(0) = y.string_value (); + Array<octave_idx_type> idx = str_table.lookup (str_y); + octave_idx_type nval = str_y.numel (); + + // Post-process. if (match_bool) { - boolNDArray match (str_y.dims ()); - - lsort.lookupb (str_table.data (), str_table.nelem (), str_y.data (), - str_y.nelem (), match.fortran_vec ()); + boolNDArray match (idx.dims ()); + for (octave_idx_type i = 0; i < nval; i++) + { + octave_idx_type j = idx.xelem (i); + match.xelem (i) = j != 0 && str_y(i) == str_table(j-1); + } retval = match; } - else + else if (match_idx) { - Array<octave_idx_type> idx (str_y.dims ()); - + NDArray ridx (idx.dims ()); if (match_idx) { - lsort.lookupm (str_table.data (), str_table.nelem (), str_y.data (), - str_y.nelem (), idx.fortran_vec ()); - } - else - { - lsort.lookup (str_table.data (), str_table.nelem (), str_y.data (), - str_y.nelem (), idx.fortran_vec ()); + for (octave_idx_type i = 0; i < nval; i++) + { + octave_idx_type j = idx.xelem (i); + ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1)) ? j : 0; + } } - retval = octave_value (idx, match_idx); + retval = ridx; } + else + retval = idx; } else print_usage ();