Mercurial > hg > octave-nkf

--- a/liboctave/Array.cc
+++ b/liboctave/Array.cc
@@ -2242,9 +2242,7 @@
 {
   Array<octave_idx_type> idx;

-  octave_sort<T> lsort;
-
-  lsort.set_compare (safe_comparator (mode, *this, true));
+  octave_sort<T> lsort (safe_comparator (mode, *this, true));

   octave_idx_type r = rows (), c = cols ();

@@ -2336,14 +2334,11 @@
   return lsort.lookup (data (), n, value);
 }

-// Ditto, but for an array of values, specializing on long runs.
-// Adds optional offset to all indices.
 template <class T>
 Array<octave_idx_type>
-Array<T>::lookup (const Array<T>& values, sortmode mode,
-                  bool linf, bool rinf) const
+Array<T>::lookup (const Array<T>& values, sortmode mode) const
 {
-  octave_idx_type n = numel ();
+  octave_idx_type n = numel (), nval = values.numel ();
   octave_sort<T> lsort;
   Array<octave_idx_type> idx (values.dims ());

@@ -2358,74 +2353,30 @@

   lsort.set_compare (mode);

-  // set offset and shift size.
-  octave_idx_type offset = 0;
-
-  if (linf && n > 0)
+  // This determines the split ratio between the O(M*log2(N)) and O(M+N) algorithms.
+  static const double ratio = 1.0;
+  sortmode vmode = UNSORTED;
+
+  // Attempt the O(M+N) algorithm if M is large enough.
+  if (nval > ratio * n / xlog2 (n + 1.0))
     {
-      offset++;
-      n--;
+      vmode = values.is_sorted ();
+      // The table must not contain a NaN.
+      if ((vmode == ASCENDING && sort_isnan<T> (values(nval-1)))
+          || (vmode == DESCENDING && sort_isnan<T> (values(0))))
+        vmode = UNSORTED;
     }
-  if (rinf && n > 0)
-    n--;
-
-  lsort.lookup (data () + offset, n, values.data (), values.numel (),
-                idx.fortran_vec (), offset);
+
+  if (vmode != UNSORTED)
+    lsort.lookup_sorted (data (), n, values.data (), nval,
+                         idx.fortran_vec (), vmode != mode);
+  else
+    lsort.lookup (data (), n, values.data (), nval, idx.fortran_vec ());

   return idx;
 }

 template <class T>
-Array<octave_idx_type>
-Array<T>::lookupm (const Array<T>& values, sortmode mode) const
-{
-  octave_idx_type n = numel ();
-  octave_sort<T> lsort;
-  Array<octave_idx_type> idx (values.dims ());
-
-  if (mode == UNSORTED)
-    {
-      // auto-detect mode
-      if (n > 1 && lsort.descending_compare (elem (0), elem (n-1)))
-        mode = DESCENDING;
-      else
-        mode = ASCENDING;
-    }
-
-  lsort.set_compare (mode);
-
-  lsort.lookupm (data (), n, values.data (), values.numel (),
-                 idx.fortran_vec ());
-
-  return idx;
-}
-
-template <class T>
-Array<bool>
-Array<T>::lookupb (const Array<T>& values, sortmode mode) const
-{
-  octave_idx_type n = numel ();
-  octave_sort<T> lsort;
-  Array<bool> match (values.dims ());
-
-  if (mode == UNSORTED)
-    {
-      // auto-detect mode
-      if (n > 1 && lsort.descending_compare (elem (0), elem (n-1)))
-        mode = DESCENDING;
-      else
-        mode = ASCENDING;
-    }
-
-  lsort.set_compare (mode);
-
-  lsort.lookupb (data (), n, values.data (), values.numel (),
-                 match.fortran_vec ());
-
-  return match;
-}
-
-template <class T>
 octave_idx_type
 Array<T>::nnz (void) const
 {
@@ -2700,14 +2651,8 @@
 Array<T>::lookup (T const &, sortmode) const \
 { return 0; } \
 template <> Array<octave_idx_type>  \
-Array<T>::lookup (const Array<T>&, sortmode, bool, bool) const \
+Array<T>::lookup (const Array<T>&, sortmode) const \
 { return Array<octave_idx_type> (); } \
-template <> Array<octave_idx_type>  \
-Array<T>::lookupm (const Array<T>&, sortmode) const \
-{ return Array<octave_idx_type> (); } \
-template <> Array<bool>  \
-Array<T>::lookupb (const Array<T>&, sortmode) const \
-{ return Array<bool> (); } \
  \
 template <> octave_idx_type \
 Array<T>::nnz (void) const\
--- a/liboctave/Array.h
+++ b/liboctave/Array.h
@@ -622,24 +622,13 @@
   // Ordering is auto-detected or can be specified.
   sortmode is_sorted_rows (sortmode mode = UNSORTED) const;

-  // Do a binary lookup in a sorted array.
+  // Do a binary lookup in a sorted array. Must not contain NaNs.
   // Mode can be specified or is auto-detected by comparing 1st and last element.
   octave_idx_type lookup (const T& value, sortmode mode = UNSORTED) const;

-  // Ditto, but for an array of values, specializing on long runs.
-  // If linf is true, the leftmost interval is extended to infinity
-  // (indices will be >= 1).
-  // If rinf is true, the rightmost interval is extended to infinity
-  // (indices will be <= length ()-1).
-  Array<octave_idx_type> lookup (const Array<T>& values, sortmode mode = UNSORTED,
-                                 bool linf = false, bool rinf = false) const;
-
-  // This looks up only exact matches, giving their indices. Non-exact matches get
-  // the value -1.
-  Array<octave_idx_type> lookupm (const Array<T>& values, sortmode mode = UNSORTED) const;
-
-  // This looks up only exact matches, returning true/false if match.
-  Array<bool> lookupb (const Array<T>& values, sortmode mode = UNSORTED) const;
+  // Ditto, but for an array of values, specializing on the case when values
+  // are sorted. NaNs get the value N.
+  Array<octave_idx_type> lookup (const Array<T>& values, sortmode mode = UNSORTED) const;

   // Count nonzero elements.
   octave_idx_type nnz (void) const;
--- a/liboctave/ChangeLog
+++ b/liboctave/ChangeLog
@@ -1,3 +1,22 @@
+2009-12-05  Jaroslav Hajek  <highegg@gmail.com>
+
+	* oct-sort.cc (lookup_binary): Remove.
+	(octave_sort<T>::lookup (const T*, octave_idx_type, const T&, comp)): Move
+	code here.
+	(octave_sort<T>::lookup (const T*, octave_idx_type, const T*,
+	octave_idx_type, octave_idx_type *, comp)): Remove offset parameter.
+	Use a simple sequence of lookups.
+	(octave_sort<T>::lookup (const T*, octave_idx_type, const T*,
+	octave_idx_type, octave_idx_type *)): Update.
+	(octave_sort<T>::lookupm, octave_sort<T>::lookupb): Remove.
+	(octave_sort<T>::lookup_sorted): New overloaded method.
+
+	* Array.cc (Array<T>::lookup (const Array<T>&, sortmode)): Remove
+	linf & rinf params. Rewrite using is_sorted to check for sortedness,
+	call octave_sort::lookup_sorted to do the sorted merge.
+	(Array<T>::lookupm, Array<T>::lookupb): Remove.
+	(NO_INSTANTIATE_ARRAY_SORT): Update.
+
 2009-12-05  Jaroslav Hajek  <highegg@gmail.com>

 	* Array.cc (sortrows_comparator): Rename to safe_comparator.
--- a/liboctave/oct-sort.cc
+++ b/liboctave/oct-sort.cc
@@ -1743,17 +1743,18 @@
 }

 // The simple binary lookup.
-template <class T, class Comp>
-inline octave_idx_type
-lookup_binary (const T *data, octave_idx_type hi,
-               const T& val, Comp comp)
+
+template <class T> template <class Comp>
+octave_idx_type
+octave_sort<T>::lookup (const T *data, octave_idx_type nel,
+                        const T& value, Comp comp)
 {
-  octave_idx_type lo = 0;
+  octave_idx_type lo = 0, hi = nel;

   while (lo < hi)
     {
       octave_idx_type mid = lo + ((hi-lo) >> 1);
-      if (comp (val, data[mid]))
+      if (comp (value, data[mid]))
         hi = mid;
       else
         lo = mid + 1;
@@ -1762,14 +1763,6 @@
   return lo;
 }

-template <class T> template <class Comp>
-octave_idx_type
-octave_sort<T>::lookup (const T *data, octave_idx_type nel,
-                        const T& value, Comp comp)
-{
-  return lookup_binary (data, nel, value, comp);
-}
-
 template <class T>
 octave_idx_type
 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
@@ -1793,30 +1786,57 @@
   return retval;
 }

-// This determines the split ratio between the O(M*log2(N)) and O(M+N) algorithms.
-static const double ratio = 1.0;
-
 template <class T> template <class Comp>
 void
 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
                         const T *values, octave_idx_type nvalues,
-                        octave_idx_type *idx, octave_idx_type offset, Comp comp)
+                        octave_idx_type *idx, Comp comp)
+{
+  // Use a sequence of binary lookups.
+  // TODO: Can this be sped up generally? The sorted merge case is dealt with
+  // elsewhere.
+  for (octave_idx_type j = 0; j < nvalues; j++)
+    idx[j] = lookup (data, nel, values[j], comp);
+}
+
+template <class T>
+void
+octave_sort<T>::lookup (const T *data, octave_idx_type nel,
+                        const T* values, octave_idx_type nvalues,
+                        octave_idx_type *idx)
 {
-  // Check whether we're comparing two sorted arrays, comparable in size.
-  if (nvalues >= ratio * nel / xlog2 (nel + 1.0)
-      && is_sorted (values, nvalues, comp))
+#ifdef INLINE_ASCENDING_SORT
+  if (compare == ascending_compare)
+    lookup (data, nel, values, nvalues, idx, std::less<T> ());
+  else
+#endif
+#ifdef INLINE_DESCENDING_SORT
+    if (compare == descending_compare)
+      lookup (data, nel, values, nvalues, idx, std::greater<T> ());
+  else
+#endif
+    if (compare)
+      lookup (data, nel, values, nvalues, idx, std::ptr_fun (compare));
+}
+
+template <class T> template <class Comp>
+void
+octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,
+                               const T *values, octave_idx_type nvalues,
+                               octave_idx_type *idx, bool rev, Comp comp)
+{
+  if (rev)
     {
-      // Use the linear algorithm.
-      octave_idx_type i = 0, j = 0;
+      octave_idx_type i = 0, j = nvalues - 1;

-      if (j != nvalues && i != nel)
+      if (nvalues > 0 && nel > 0)
         {
           while (true)
             {
               if (comp (values[j], data[i]))
                 {
-                  idx[j] = i + offset;
-                  if (++j == nvalues)
+                  idx[j] = i;
+                  if (--j < 0)
                     break;
                 }
               else if (++i == nel)
@@ -1824,58 +1844,20 @@
             }
         }

-      for (; j != nvalues; j++)
-        idx[j] = i + offset;
-
+      for (; j >= 0; j--)
+        idx[j] = i;
     }
   else
     {
-      // Use a sequence of binary lookups.
-      for (octave_idx_type j = 0; j < nvalues; j++)
-        idx[j] = lookup_binary (data, nel, values[j], comp) + offset;
-    }
-}
-
-template <class T>
-void
-octave_sort<T>::lookup (const T *data, octave_idx_type nel,
-                        const T* values, octave_idx_type nvalues,
-                        octave_idx_type *idx, octave_idx_type offset)
-{
-#ifdef INLINE_ASCENDING_SORT
-  if (compare == ascending_compare)
-    lookup (data, nel, values, nvalues, idx, offset, std::less<T> ());
-  else
-#endif
-#ifdef INLINE_DESCENDING_SORT
-    if (compare == descending_compare)
-      lookup (data, nel, values, nvalues, idx, offset, std::greater<T> ());
-  else
-#endif
-    if (compare)
-      lookup (data, nel, values, nvalues, idx, offset, std::ptr_fun (compare));
-}
-
-template <class T> template <class Comp>
-void
-octave_sort<T>::lookupm (const T *data, octave_idx_type nel,
-                         const T *values, octave_idx_type nvalues,
-                         octave_idx_type *idx, Comp comp)
-{
-  // Check whether we're comparing two sorted arrays, comparable in size.
-  if (nvalues >= ratio * nel / xlog2 (nel + 1.0)
-      && is_sorted (values, nvalues, comp))
-    {
-      // Use the linear algorithm.
       octave_idx_type i = 0, j = 0;

-      if (j != nvalues && i != nel)
+      if (nvalues > 0 && nel > 0)
         {
           while (true)
             {
               if (comp (values[j], data[i]))
                 {
-                  idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1;
+                  idx[j] = i;
                   if (++j == nvalues)
                     break;
                 }
@@ -1885,101 +1867,28 @@
         }

       for (; j != nvalues; j++)
-        idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1;
-
-    }
-  else
-    {
-      // Use a sequence of binary lookups.
-      for (octave_idx_type j = 0; j < nvalues; j++)
-        {
-          octave_idx_type i = lookup_binary (data, nel, values[j], comp);
-          idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1;
-        }
+        idx[j] = i;
     }
 }

 template <class T>
 void
-octave_sort<T>::lookupm (const T *data, octave_idx_type nel,
-                         const T* values, octave_idx_type nvalues,
-                         octave_idx_type *idx)
+octave_sort<T>::lookup_sorted (const T *data, octave_idx_type nel,
+                               const T* values, octave_idx_type nvalues,
+                               octave_idx_type *idx, bool rev)
 {
 #ifdef INLINE_ASCENDING_SORT
   if (compare == ascending_compare)
-    lookupm (data, nel, values, nvalues, idx, std::less<T> ());
+    lookup_sorted (data, nel, values, nvalues, idx, rev, std::less<T> ());
   else
 #endif
 #ifdef INLINE_DESCENDING_SORT
     if (compare == descending_compare)
-      lookupm (data, nel, values, nvalues, idx, std::greater<T> ());
+      lookup_sorted (data, nel, values, nvalues, idx, rev, std::greater<T> ());
   else
 #endif
     if (compare)
-      lookupm (data, nel, values, nvalues, idx, std::ptr_fun (compare));
-}
-
-template <class T> template <class Comp>
-void
-octave_sort<T>::lookupb (const T *data, octave_idx_type nel,
-                         const T *values, octave_idx_type nvalues,
-                         bool *match, Comp comp)
-{
-  // Check whether we're comparing two sorted arrays, comparable in size.
-  if (nvalues >= ratio * nel / xlog2 (nel + 1.0)
-      && is_sorted (values, nvalues, comp))
-    {
-      // Use the linear algorithm.
-      octave_idx_type i = 0, j = 0;
-
-      if (j != nvalues && i != nel)
-        {
-          while (true)
-            {
-              if (comp (values[j], data[i]))
-                {
-                  match[j] = (i != 0 && ! comp (data[i-1], values[j]));
-                  if (++j == nvalues)
-                    break;
-                }
-              else if (++i == nel)
-                break;
-            }
-        }
-
-      for (; j != nvalues; j++)
-        match[j] = (i != 0 && ! comp (data[i-1], values[j]));
-
-    }
-  else
-    {
-      // Use a sequence of binary lookups.
-      for (octave_idx_type j = 0; j < nvalues; j++)
-        {
-          octave_idx_type i = lookup_binary (data, nel, values[j], comp);
-          match[j] = (i != 0 && ! comp (data[i-1], values[j]));
-        }
-    }
-}
-
-template <class T>
-void
-octave_sort<T>::lookupb (const T *data, octave_idx_type nel,
-                         const T* values, octave_idx_type nvalues,
-                         bool *match)
-{
-#ifdef INLINE_ASCENDING_SORT
-  if (compare == ascending_compare)
-    lookupb (data, nel, values, nvalues, match, std::less<T> ());
-  else
-#endif
-#ifdef INLINE_DESCENDING_SORT
-    if (compare == descending_compare)
-      lookupb (data, nel, values, nvalues, match, std::greater<T> ());
-  else
-#endif
-    if (compare)
-      lookupb (data, nel, values, nvalues, match, std::ptr_fun (compare));
+      lookup_sorted (data, nel, values, nvalues, idx, rev, std::ptr_fun (compare));
 }

 template <class T> template <class Comp>
--- a/liboctave/oct-sort.h
+++ b/liboctave/oct-sort.h
@@ -142,22 +142,16 @@
   octave_idx_type lookup (const T *data, octave_idx_type nel,
                           const T& value);

-  // Ditto, but for an array of values, specializing on long runs.
-  // Adds offset to all indices.
+  // Ditto, but for an array.
   void lookup (const T *data, octave_idx_type nel,
                const T* values, octave_idx_type nvalues,
-               octave_idx_type *idx, octave_idx_type offset = 0);
+               octave_idx_type *idx);

-  // Lookup an array of values, only returning indices of
-  // exact matches. Non-matches are returned as -1.
-  void lookupm (const T *data, octave_idx_type nel,
-                const T* values, octave_idx_type nvalues,
-                octave_idx_type *idx);
-
-  // Lookup an array of values, only indicating exact matches.
-  void lookupb (const T *data, octave_idx_type nel,
-                const T* values, octave_idx_type nvalues,
-                bool *match);
+  // A linear merge of two sorted tables. rev indicates the second table is
+  // in reverse order.
+  void lookup_sorted (const T *data, octave_idx_type nel,
+                      const T* values, octave_idx_type nvalues,
+                      octave_idx_type *idx, bool rev = false);

   // Rearranges the array so that the elements with indices
   // lo..up-1 are in their correct place.
@@ -316,17 +310,12 @@
   template <class Comp>
   void lookup (const T *data, octave_idx_type nel,
                const T* values, octave_idx_type nvalues,
-               octave_idx_type *idx, octave_idx_type offset, Comp comp);
+               octave_idx_type *idx, Comp comp);

   template <class Comp>
-  void lookupm (const T *data, octave_idx_type nel,
-                const T* values, octave_idx_type nvalues,
-                octave_idx_type *idx, Comp comp);
-
-  template <class Comp>
-  void lookupb (const T *data, octave_idx_type nel,
-                const T* values, octave_idx_type nvalues,
-                bool *match, Comp comp);
+  void lookup_sorted (const T *data, octave_idx_type nel,
+                      const T* values, octave_idx_type nvalues,
+                      octave_idx_type *idx, bool rev, Comp comp);

   template <class Comp>
   void nth_element (T *data, octave_idx_type nel,
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2009-12-05  Jaroslav Hajek  <highegg@gmail.com>
+
+	* DLD-FUNCTIONS/lookup.cc (do_numeric_lookup): Rewrite.
+	(Flookup): Simplify string part. Use Array<std::string>::lookup.
+
 2009-12-04  John W. Eaton  <jwe@octave.org>

 	* DLD-FUNCTIONS/urlwrite.cc (curl_handle::init): Always use
--- a/src/DLD-FUNCTIONS/lookup.cc
+++ b/src/DLD-FUNCTIONS/lookup.cc
@@ -111,24 +111,61 @@
 {
   octave_value retval;

+  Array<octave_idx_type> idx = array.lookup (values);
+  octave_idx_type n = array.numel (), nval = values.numel ();
+
+  // Post-process.
   if (match_bool)
     {
-      boolNDArray match = Array<bool> (array.lookupb (values));
+      boolNDArray match (idx.dims ());
+      for (octave_idx_type i = 0; i < nval; i++)
+        {
+          octave_idx_type j = idx.xelem (i);
+          match.xelem (i) = j != 0 && values(i) == array(j-1);
+        }
+
       retval = match;
     }
+  else if (match_idx || left_inf || right_inf)
+    {
+      NDArray ridx (idx.dims ());
+      if (match_idx)
+        {
+          for (octave_idx_type i = 0; i < nval; i++)
+            {
+              octave_idx_type j = idx.xelem (i);
+              ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
+            }
+        }
+      else if (left_inf && right_inf)
+        {
+          for (octave_idx_type i = 0; i < nval; i++)
+            {
+              octave_idx_type j = idx.xelem (i);
+              ridx.xelem (i) = std::min (std::max (1, j), n-1);
+            }
+        }
+      else if (left_inf)
+        {
+          for (octave_idx_type i = 0; i < nval; i++)
+            {
+              octave_idx_type j = idx.xelem (i);
+              ridx.xelem (i) = std::max (1, j);
+            }
+        }
+      else if (right_inf)
+        {
+          for (octave_idx_type i = 0; i < nval; i++)
+            {
+              octave_idx_type j = idx.xelem (i);
+              ridx.xelem (i) = std::min (j, n-1);
+            }
+        }
+
+      retval = ridx;
+    }
   else
-    {
-      Array<octave_idx_type> idx;
-
-      if (match_idx)
-        idx = array.lookupm (values);
-      else
-        idx = array.lookup (values, UNSORTED, left_inf, right_inf);
-
-      // if left_inf is specified, the result is a valid 1-based index.
-      bool cache_index = left_inf && array.numel () > 1;
-      retval = octave_value (idx, match_idx, cache_index);
-    }
+    retval = idx;

   return retval;
 }
@@ -262,30 +299,14 @@
     }
   else if (str_case)
     {
-      Array<std::string> str_table = table.cellstr_value ();
-
-      // Here we'll use octave_sort directly to avoid converting the array
-      // for case-insensitive comparison.
-
-
-      // check for case-insensitive option
-      if (nargin == 3)
+      // FIXME: this should be handled directly.
+      if (icase)
         {
-          std::string opt = args(2).string_value ();
+          table = table.xtoupper ();
+          y = y.xtoupper ();
         }

-      sortmode mode = (icase ? get_sort_mode (str_table, stri_comp_gt)
-                       : get_sort_mode (str_table));
-
-      bool (*str_comp) (const std::string&, const std::string&);
-
-      // pick the correct comparator
-      if (mode == DESCENDING)
-        str_comp = icase ? stri_comp_gt : octave_sort<std::string>::descending_compare;
-      else
-        str_comp = icase ? stri_comp_lt : octave_sort<std::string>::ascending_compare;
-
-      octave_sort<std::string> lsort (str_comp);
+      Array<std::string> str_table = table.cellstr_value ();
       Array<std::string> str_y (1);

       if (y.is_cellstr ())
@@ -293,32 +314,37 @@
       else
         str_y(0) = y.string_value ();

+      Array<octave_idx_type> idx = str_table.lookup (str_y);
+      octave_idx_type nval = str_y.numel ();
+
+      // Post-process.
       if (match_bool)
         {
-          boolNDArray match (str_y.dims ());
-
-          lsort.lookupb (str_table.data (), str_table.nelem (), str_y.data (),
-                         str_y.nelem (), match.fortran_vec ());
+          boolNDArray match (idx.dims ());
+          for (octave_idx_type i = 0; i < nval; i++)
+            {
+              octave_idx_type j = idx.xelem (i);
+              match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
+            }

           retval = match;
         }
-      else
+      else if (match_idx)
         {
-          Array<octave_idx_type> idx (str_y.dims ());
-
+          NDArray ridx (idx.dims ());
           if (match_idx)
             {
-              lsort.lookupm (str_table.data (), str_table.nelem (), str_y.data (),
-                             str_y.nelem (), idx.fortran_vec ());
-            }
-          else
-            {
-              lsort.lookup (str_table.data (), str_table.nelem (), str_y.data (),
-                            str_y.nelem (), idx.fortran_vec ());
+              for (octave_idx_type i = 0; i < nval; i++)
+                {
+                  octave_idx_type j = idx.xelem (i);
+                  ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1)) ? j : 0;
+                }
             }

-          retval = octave_value (idx, match_idx);
+          retval = ridx;
         }
+      else
+        retval = idx;
     }
   else
     print_usage ();