Mercurial > hg > octave-kai

--- a/libinterp/dldfcn/icholt.cc
+++ b/libinterp/dldfcn/icholt.cc
@@ -21,7 +21,7 @@
  * This file implements the Incomplete Cholesky Factorization with threshold.
  *
  * TODO: - Remove Fortran indexing
- *       - Implement MIC
+ *       - Improve MIC
  */

 #ifdef HAVE_CONFIG_H
@@ -57,50 +57,51 @@
  */
 template < typename octave_sparse_matrix_t,
   typename data_t > octave_value_list
-icholt (octave_sparse_matrix_t in_A, data_t droptol, octave_idx_type nl,
-	octave_idx_type maxk, bool michol)
+icholt (octave_sparse_matrix_t in_A, data_t droptol,
+	octave_idx_type nnz_guess, octave_idx_type maxk, bool michol)
 {
   octave_value_list retval;

   octave_idx_type n = in_A.cols ();

   // Allocate workig arrays for output matrix
-  OCTAVE_LOCAL_BUFFER (data_t, data, nl + 1);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx, nl + 1);
+  OCTAVE_LOCAL_BUFFER (data_t, data, nnz_guess + 1);
+  OCTAVE_LOCAL_BUFFER (octave_idx_type, ridx, nnz_guess + 1);
   OCTAVE_LOCAL_BUFFER (octave_idx_type, cidx, n + 2);
-  OCTAVE_LOCAL_BUFFER (octave_idx_type, link, nl + 1);
+  OCTAVE_LOCAL_BUFFER (octave_idx_type, link, nnz_guess + 1);

   // Allocate workig arrays
   OCTAVE_LOCAL_BUFFER (octave_idx_type, icol, maxk + 1);
   OCTAVE_LOCAL_BUFFER (octave_idx_type, kpoint, maxk + 1);
+  OCTAVE_LOCAL_BUFFER (data_t, diag, n + 1);

   // Copy diagonal into data and set link
   for (octave_idx_type i = 1; i <= n; i++)
     {
-      data[nl - n + i] = in_A.data (in_A.cidx (i - 1));
-      link[nl - n + i] = 0;
+      diag[i] = in_A.data (in_A.cidx (i - 1));
+      link[nnz_guess - n + i] = 0;
     }

-  octave_idx_type pl = 1;
-  octave_idx_type rejected_elements = 0;
-  // Start of column i
+  octave_idx_type diag_idx = 1;
+  // start of column i
   for (octave_idx_type i = 1; i <= n; i++)
     {
-      octave_idx_type col_start = in_A.cidx (i - 1) + 1;
-      octave_idx_type kk = 0;
-      octave_idx_type py = pl;
-      cidx[i] = pl;
-      octave_idx_type lpoint = link[nl - n + i];
+      octave_idx_type column_start_idx = in_A.cidx (i - 1) + 1;
+      octave_idx_type extra_elements = 0;
+      octave_idx_type insert_idx = diag_idx;
+      cidx[i] = diag_idx;
+      octave_idx_type lpoint = link[nnz_guess - n + i];

+      // iterate through extra elements
       while (lpoint != 0)
 	{
-	  kk++;
-	  icol[kk] = lpoint;
-	  if ((ridx[lpoint + 1] - i) == 0)
-	    kpoint[kk] = 1;
+	  extra_elements++;
+	  icol[extra_elements] = lpoint;
+	  if (i == ridx[lpoint + 1])
+	    kpoint[extra_elements] = 1;
 	  else
-	    kpoint[kk] = lpoint + 1;
-	  if ((kk - maxk) >= 0)
+	    kpoint[extra_elements] = lpoint + 1;
+	  if (extra_elements >= maxk)
 	    {
 	      error ("icholt: maxk was not set large enough.");
 	      return retval;
@@ -108,100 +109,94 @@
 	  lpoint = link[lpoint];
 	}

-      octave_idx_type j = i;
-      data_t dataii = 0.0;
-      do
+      // check for correct diagonal in column i
+      if (diag[i] <= 0.0)
 	{
-	  octave_idx_type nextj =
-	    std::numeric_limits < octave_idx_type >::max ();
-	  octave_idx_type row = in_A.ridx (col_start - 1) + 1;
+	  error
+	    ("icholt: Pivot error. Check if matrix is symmetric positive definite.");
+	  retval (0) = octave_value (i);
+	  return retval;
+	}

-	  data_t x = in_A.data (col_start - 1);
-	  if ((j - row) != 0)
-	    x = 0.0;
-	  else if ((j - i) == 0)
-	    x = data[nl - n + i];
+      octave_idx_type nextj = std::numeric_limits < octave_idx_type >::max ();
+      // for each row j
+      for (octave_idx_type j = i;
+	   j < std::numeric_limits < octave_idx_type >::max (); j = nextj)
+	{
+	  octave_idx_type row_start_idx =
+	    in_A.ridx (column_start_idx - 1) + 1;
+	  data_t x = in_A.data (column_start_idx - 1);

-	  if (j < row)
-	    nextj = row;
+	  nextj = std::numeric_limits < octave_idx_type >::max ();

-	  if ((j - row) == 0)
-	    if ((col_start - in_A.cidx (i)) < 0)	// with Fortran: cidx[i + 1] - 1
-	      {
-		col_start++;
-		nextj = in_A.ridx (col_start - 1) + 1;
-	      }
-
-	  if (kk > 0)
+	  // if missaligned advance j to starting row index of current column
+	  if (j < row_start_idx)
+	    nextj = row_start_idx;
+	  if (j == row_start_idx)
 	    {
-	      for (octave_idx_type k = 1; k <= kk; k++)
+	      // if current column contains more elements
+	      if ((column_start_idx - in_A.cidx (i)) < 0)	// with Fortran: cidx[i + 1] - 1
 		{
-		  octave_idx_type kpk = kpoint[k];
-		  if ((ridx[kpoint[k]] - j) == 0)
-		    {
-		      x = x - data[kpk] * data[icol[k]];
-		      kpk++;
-		      kpoint[k] = kpk;
-		    }
-		  if ((j < ridx[kpk]) && (ridx[kpk] < nextj))
-		    nextj = ridx[kpk];
+		  column_start_idx++;
+		  nextj = in_A.ridx (column_start_idx - 1) + 1;
 		}
 	    }
+	  else
+	    x = 0.0;
+
+	  // treat extra elements
+	  for (octave_idx_type k = 1; k <= extra_elements; k++)
+	    {
+	      if (j == ridx[kpoint[k]])
+		{
+		  x -= data[kpoint[k]] * data[icol[k]];
+		  kpoint[k]++;
+		}
+	      if ((j < ridx[kpoint[k]]) && (ridx[kpoint[k]] < nextj))
+		nextj = ridx[kpoint[k]];
+	    }

-	  if ((j == i) && (x <= 0.0))
+	  // insert element, if rejection criterion is not met
+	  if ((x * x - droptol * droptol * diag[i] * diag[j]) < 0.0)
 	    {
-	      error
-		("icholt: Pivot error. Check if matrix is symmetric positive definite.");
-	      retval (0) = octave_value (i);
-	      return retval;
-	    }
-	  dataii = data[nl - n + i];
-	  if (x != 0.0)
-	    {
-	      data_t datajj = data[nl - n + j];
-
-	      // rejection criterion
-	      if ((x * x - droptol * droptol * dataii * datajj) < 0.0)
+	      if (michol)
 		{
-		  x = std::abs (x);
-		  data_t y = std::sqrt (datajj / dataii);
-		  dataii += x * y;
-		  data[nl - n + j] = datajj + x / y;
-		  rejected_elements++;
-		}
-	      else
-		{
-		  data[py] = x;
-		  ridx[py] = j;
-		  link[py] = link[nl - n + j];
-		  link[nl - n + j] = py;
-		  py++;
-		  if (py > (nl - n + i))
-		    {
-		      error
-			("icholt: Not sufficient storage for output matrix.");
-		      return retval;
-		    }
+		  data_t y = std::sqrt (diag[j] / diag[i]);
+		  diag[i] += std::abs (x) * y;
+		  diag[j] += std::abs (x) / y;
 		}
 	    }
-	  j = nextj;
+	  else
+	    {
+	      data[insert_idx] = x;
+	      ridx[insert_idx] = j;
+	      link[insert_idx] = link[nnz_guess - n + j];
+	      link[nnz_guess - n + j] = insert_idx;
+	      insert_idx++;
+	      if (insert_idx > (nnz_guess - n + i))
+		{
+		  error ("icholt: Not sufficient storage for output matrix.");
+		  return retval;
+		}
+	    }
 	}
-      while (j < std::numeric_limits < octave_idx_type >::max ());
-
-      dataii = std::sqrt (dataii);
-      data[pl] = dataii;

-      if ((pl + 1) <= (py - 1))
-	for (octave_idx_type px = pl + 1; px <= py - 1; px++)
+      diag[i] = std::sqrt (diag[i]);
+      data[diag_idx] = diag[i];
+
+      if ((diag_idx + 1) <= (insert_idx - 1))
+	for (octave_idx_type j = diag_idx + 1; j <= insert_idx - 1; j++)
 	  {
-	    data[px] /= dataii;
-	    data[nl - n + ridx[px]] -= data[px] * data[px];
+	    // divide whole column by diagonal
+	    data[j] /= diag[i];
+	    // update lower diagonals
+	    diag[ridx[j]] -= data[j] * data[j];
 	  }
-      pl = py;
-      cidx[n + 1] = py;
+      diag_idx = insert_idx;
+      cidx[n + 1] = insert_idx;	// write new nnz
     }

-  octave_idx_type nnz = link[nl];
+  octave_idx_type nnz = link[nnz_guess];
   SparseMatrix L (n, n, nnz);
   for (octave_idx_type i = 1; i <= nnz; i++)
     L.data (i - 1) = data[i];
@@ -359,7 +354,7 @@
 %! [L] = icholt (A_2_in, 1e-4, false);
 %! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 1e-4, 1e-4)
 %! [L] = icholt (A_2_in, 1e-4, true);
-%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 1e-4, 1e-4)
+%! assert (norm (A_2 - L*L', 'fro') / norm (A_2, 'fro'), 2e-4, 1e-4)
 %!
 %!test
 %! [L] = icholt (A_3_in, 1e-4, false);
@@ -371,7 +366,7 @@
 %! [L] = icholt (A_4_in, 1e-4, false);
 %! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 1e-4, 1e-4)
 %! [L] = icholt (A_4_in, 1e-4, true);
-%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 1e-4, 1e-4)
+%! assert (norm (A_4 - L*L', 'fro') / norm (A_4, 'fro'), 3e-4, 1e-4)
 %!
 %!test
 %!error [L] = icholt (A_5_in, 1e-4, false);