Mercurial > hg > octave-kai

--- a/libinterp/dldfcn/ichol0jp.cc
+++ b/libinterp/dldfcn/ichol0jp.cc
@@ -21,8 +21,7 @@
  * This file implements the Incomplete Cholesky Factorization according to
  * the Jones and Plassmann strategy.
  *
- * TODO: - remove Fortran indexing
- *       - implement MIC(0)
+ * TODO: - implement MIC(0)
  *       - complex input handling
  */

@@ -34,77 +33,38 @@
 #include "defun-dld.h"

 /**
- * Sorts value_vec indirectly addressed by index_vec using heap sort. index_vec
- * is arranged such that index_vec[1] addresses the largest element in
- * value_vec, index_vec[2] the next largest, and so on.
+ * Helper class for sorting indirectly addressed values in a sparse matrix.
  *
  * @tparam data_t The type of the data operated on. In the real case it will
  *   be "double".
- *
- * @param len Length of the vectors index_vec and value_vec to be sorted.
- * @param value_vec Data to be sorted.
- * @param index_vec Index vector of the data.
  */
-template < typename data_t > void
-ihsort (octave_idx_type & len, octave_idx_type * index_vec,
-	data_t * value_vec)
+template < typename data_t > class indexed_value_comperator
 {
-  // return if too small
-  if (len <= 1)
-    return;
+private:
+  data_t * m_value_vec;
+public:
+  /**
+   * Constructor.
+   *
+   * @param value_vec Vector with the indirectly addressed values.
+   */
+  indexed_value_comperator (data_t * value_vec)
+  {
+    m_value_vec = value_vec;
+  }

-  // build the heap
-  octave_idx_type mid = len / 2;
-  for (octave_idx_type k = mid; k >= 1; k--)
-    {
-      octave_idx_type x = index_vec[k];
-      octave_idx_type lheap = k;
-      octave_idx_type rheap = len;
-      octave_idx_type m = lheap * 2;
-      while (m <= rheap)
-	{
-	  if (m < rheap)
-	    if (std::abs (value_vec[index_vec[m]]) >
-		std::abs (value_vec[index_vec[m + 1]]))
-	      m++;
-	  if (std::abs (value_vec[x]) <= std::abs (value_vec[index_vec[m]]))
-	    m = rheap + 1;
-	  else
-	    {
-	      index_vec[lheap] = index_vec[m];
-	      lheap = m;
-	      m = 2 * lheap;
-	    }
-	}
-      index_vec[lheap] = x;
-    }
-
-  // sort the heap
-  for (octave_idx_type k = len; k >= 2; k--)
-    {
-      octave_idx_type x = index_vec[k];
-      index_vec[k] = index_vec[1];
-      octave_idx_type lheap = 1;
-      octave_idx_type rheap = k - 1;
-      octave_idx_type m = 2;
-      while (m <= rheap)
-	{
-	  if (m < rheap)
-	    if (std::abs (value_vec[index_vec[m]]) >
-		std::abs (value_vec[index_vec[m + 1]]))
-	      m++;
-	  if (std::abs (value_vec[x]) <= std::abs (value_vec[index_vec[m]]))
-	    m = rheap + 1;
-	  else
-	    {
-	      index_vec[lheap] = index_vec[m];
-	      lheap = m;
-	      m = 2 * lheap;
-	    }
-	}
-      index_vec[lheap] = x;
-    }
-}
+  /**
+   * Comparison function.
+   *
+   * @param a First indirect address to a value in value_vec.
+   * @param b Second indirect address to a value in value_vec.
+   * @return true, if abs(value_vec[a]) < abs (value_vec[b]), otherwise false.
+   */
+  bool operator              () (octave_idx_type a, octave_idx_type b)
+  {
+    return std::abs (m_value_vec[a]) < std::abs (m_value_vec[b]);
+  }
+};

 /**
  * Get the k largest non-zeros in value_vec indirectly addressed by index_vec.
@@ -127,47 +87,41 @@
   if ((n <= 1) || (k <= 0))
     return;

-  // heap sort the first k elements of the vector
-  ihsort (k, index_vec, value_vec);
+  // create comperator for indirectly addressed values
+  indexed_value_comperator < data_t > cmp (value_vec);
+
+  // sort the first k elements of the index vector
+  std::sort (index_vec, index_vec + k, cmp);

   // loop through the rest of the vector and find any elements that are larger
   // than any of the first k elements
-  data_t cur_min = std::abs (value_vec[index_vec[k]]);
-  for (octave_idx_type i = k + 1; i <= n; i++)
+  for (octave_idx_type i = k; i < n; i++)
     {
-      octave_idx_type itemp = index_vec[i];
-      data_t new_value = std::abs (value_vec[itemp]);
-      if (new_value > cur_min)
+      // if there is a larger element
+      if (cmp (index_vec[k - 1], index_vec[i]))
 	{
-	  // find position for new value
-	  octave_idx_type left = 1;
-	  data_t lval = std::abs (value_vec[index_vec[1]]);
-	  octave_idx_type cur_ptr = 1;
-	  if (new_value <= lval)
+	  // find position for new value (insert_idx) using bisection
+	  octave_idx_type left_idx = 0;	// left index of sorted k elements
+	  octave_idx_type right_idx = k - 1;	// right index of sorted k elements
+	  octave_idx_type insert_idx = (left_idx + right_idx) / 2;	// set to middle (not exact)
+	  if (!cmp (index_vec[left_idx], index_vec[i]))
 	    {
-	      octave_idx_type right = k;
-	      cur_ptr = (k + 1) / 2;
-	      while (right > left + 1)
+	      while (left_idx + 1 < right_idx)
 		{
-		  data_t cur_val = std::abs (value_vec[index_vec[cur_ptr]]);
-		  if (cur_val < new_value)
-		    right = cur_ptr;
+		  if (cmp (index_vec[insert_idx], index_vec[i]))
+		    right_idx = insert_idx;
 		  else
-		    {
-		      left = cur_ptr;
-		      lval = cur_val;
-		    }
-		  cur_ptr = (right + left) / 2;
+		    left_idx = insert_idx;
+		  insert_idx = (left_idx + right_idx) / 2;
 		}
-	      cur_ptr = right;
+	      insert_idx = right_idx;
 	    }

 	  // shift sorted values and insert new value
-	  index_vec[i] = index_vec[k];
-	  for (octave_idx_type j = k; j >= cur_ptr + 1; j--)
+	  index_vec[i] = index_vec[k - 1];
+	  for (octave_idx_type j = k - 1; j > insert_idx; j--)
 	    index_vec[j] = index_vec[j - 1];
-	  index_vec[cur_ptr] = itemp;
-	  cur_min = std::abs (value_vec[index_vec[k]]);
+	  index_vec[insert_idx] = index_vec[i];
 	}
     }
 }
@@ -210,42 +164,43 @@
   octave_column_vector_t result_diag (n);
   octave_sparse_matrix_t result_A (n, n, nnz);

-  // Allocate working memory (Fortran indexing)
-  OCTAVE_LOCAL_BUFFER (data_t, diag, n + 1);
-  OCTAVE_LOCAL_BUFFER (data_t, off_data, nnz + 1);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx, n + 2);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx, nnz + 1);
-  OCTAVE_LOCAL_BUFFER (data_t, ta, n + 1);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, itcol, n + 1);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, ifirst, n + 1);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, list, n + 1);
+  // Define convenience pointers to the data structures
+  data_t *diag = result_diag.fortran_vec ();
+  data_t *off_data = result_A.data ();
+  octave_idx_type *cidx = result_A.cidx ();
+  octave_idx_type *ridx = result_A.ridx ();
+
+  // Allocate working memory
+  OCTAVE_LOCAL_BUFFER (data_t, ta, n);
+  OCTAVE_LOCAL_BUFFER (octave_idx_type, itcol, n);
+  OCTAVE_LOCAL_BUFFER (octave_idx_type, ifirst, n);
+  OCTAVE_LOCAL_BUFFER (octave_idx_type, list, n);

-  // Copy input data to work and result data structures
-  for (octave_idx_type i = 1; i <= n; i++)
-    diag[i] = in_diag (i - 1);
-  for (octave_idx_type i = 1; i <= nnz; i++)
-    off_data[i] = in_A.data (i - 1);
-  for (octave_idx_type i = 1; i <= (n + 1); i++)
+  // Copy input data to result data structures
+  for (octave_idx_type i = 0; i < n; i++)
+    diag[i] = in_diag (i);
+  for (octave_idx_type i = 0; i < nnz; i++)
+    off_data[i] = in_A.data (i);
+  for (octave_idx_type i = 0; i < (n + 1); i++)
     {
-      cidx[i] = in_A.cidx (i - 1) + 1;
-      result_A.cidx (i - 1) = in_A.cidx (i - 1);
+      cidx[i] = in_A.cidx (i);
     }
-  for (octave_idx_type i = 1; i <= nnz; i++)
-    ridx[i] = in_A.ridx (i - 1) + 1;
+  for (octave_idx_type i = 0; i < nnz; i++)
+    ridx[i] = in_A.ridx (i);


   // Initialize data structures
-  for (octave_idx_type j = 1; j <= n; j++)
+  for (octave_idx_type j = 0; j < n; j++)
     {
-      ifirst[j] = 0;
-      list[j] = 0;
+      ifirst[j] = -1;
+      list[j] = -1;
     }

   // loop over all columns
-  for (octave_idx_type k = 1; k <= n; k++)
+  for (octave_idx_type k = 0; k < n; k++)
     {
       // load column k into working vector ta
-      octave_idx_type ta_len = 0;
+      octave_idx_type ta_len = -1;
       octave_idx_type col_k_start = cidx[k];
       octave_idx_type col_k_end = cidx[k + 1] - 1;
       for (octave_idx_type j = col_k_start; j <= col_k_end; j++)
@@ -254,7 +209,7 @@
 	  ta[row] = off_data[j];
 	  ta_len++;
 	  itcol[ta_len] = row;
-	  ifirst[row] = 1;
+	  ifirst[row] = 0;
 	}

       // make sure the diagonal of k is okay and then take the sqrt
@@ -268,7 +223,7 @@

       // update column k using the previous columns
       octave_idx_type j = list[k];
-      while (j != 0)
+      while (j != -1)
 	{
 	  octave_idx_type col_j_start = ifirst[j];
 	  octave_idx_type col_j_end = cidx[j + 1] - 1;
@@ -287,11 +242,13 @@
 	  for (octave_idx_type i = col_j_start; i <= col_j_end; i++)
 	    {
 	      octave_idx_type row = ridx[i];
-	      if (ifirst[row] != 0)
-		ta[row] -= lval * off_data[i];
+	      if (ifirst[row] != -1)
+		{
+		  ta[row] -= lval * off_data[i];
+		}
 	      else
 		{
-		  ifirst[row] = 1;
+		  ifirst[row] = 0;
 		  ta_len++;
 		  itcol[ta_len] = row;
 		  ta[row] = -lval * off_data[i];
@@ -300,7 +257,7 @@
 	}

       // update remaining diagonals using column k
-      for (j = 1; j <= ta_len; j++)
+      for (j = 0; j <= ta_len; j++)
 	{
 	  octave_idx_type row = itcol[j];
 	  ta[row] /= diag[k];
@@ -308,12 +265,13 @@
 	}

       // find the largest elements in column k now
-      octave_idx_type count = std::min (col_k_end - col_k_start + 1, ta_len);
+      octave_idx_type count =
+	std::max (std::min (col_k_end - col_k_start + 1, ta_len), 0);
       ibsort (ta_len, count, ta, itcol);
-      std::sort (itcol + 1, itcol + 1 + count);
+      std::sort (itcol, itcol + count);

       // put the largest elements back into the sparse data structure
-      count = 1;
+      count = 0;
       for (j = col_k_start; j <= col_k_end; j++)
 	{
 	  off_data[j] = ta[itcol[count]];
@@ -330,18 +288,10 @@
 	  ifirst[k] = col_k_start;
 	}

-      for (j = 1; j <= ta_len; j++)
-	ifirst[itcol[j]] = 0;
+      for (j = 0; j < ta_len; j++)
+	ifirst[itcol[j]] = -1;
     }

-  // Copy new data to result data structures
-  for (octave_idx_type i = 1; i <= n; i++)
-    result_diag (i - 1) = diag[i];
-  for (octave_idx_type i = 1; i <= nnz; i++)
-    result_A.ridx (i - 1) = ridx[i] - 1;
-  for (octave_idx_type i = 1; i <= nnz; i++)
-    result_A.data (i - 1) = off_data[i];
-
   retval (1) = octave_value (result_diag);
   retval (0) = octave_value (result_A);