Mercurial > hg > octave-lyh
diff liboctave/Sparse-op-defs.h @ 5587:b4cb3f93c1e1
[project @ 2006-01-04 22:03:38 by dbateman]
author | dbateman |
---|---|
date | Wed, 04 Jan 2006 22:03:38 +0000 |
parents | d37b96139376 |
children | 2857357f9d3c |
line wrap: on
line diff
--- a/liboctave/Sparse-op-defs.h +++ b/liboctave/Sparse-op-defs.h @@ -1560,12 +1560,12 @@ octave_idx_type col = a.ridx(j); \ for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \ { \ - OCTAVE_QUIT; \ if (w[m.ridx(k)] < i + 1) \ { \ w[m.ridx(k)] = i + 1; \ nel++; \ } \ + OCTAVE_QUIT; \ } \ } \ } \ @@ -1580,35 +1580,85 @@ OCTAVE_LOCAL_BUFFER (EL_TYPE, Xcol, nr); \ \ RET_TYPE retval (nr, a_nc, nel); \ - \ octave_idx_type ii = 0; \ - \ - retval.xcidx(0) = 0; \ - for (octave_idx_type i = 0; i < a_nc ; i++) \ + /* The optimal break-point as estimated from simulations */ \ + /* Note that Mergesort is O(nz log(nz)) while searching all */ \ + /* values is O(nr), where nz here is non-zero per row of */ \ + /* length nr. The test itself was then derived from the */ \ + /* simulation with random square matrices and the observation */ \ + /* of the number of non-zero elements in the output matrix */ \ + /* it was found that the breakpoints were */ \ + /* nr: 500 1000 2000 5000 10000 */ \ + /* nz: 6 25 97 585 2202 */ \ + /* The below is a simplication of the 'polyfit'-ed parameters */ \ + /* to these breakpoints */ \ + if (nr > 43000 || ((nr * nr) * double(a_nc)) / 43000 > nel) \ { \ - for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ + octave_idx_type *ri = retval.xridx(); \ + octave_sort<octave_idx_type> sort; \ + \ + retval.xcidx(0) = 0; \ + for (octave_idx_type i = 0; i < a_nc ; i++) \ { \ - octave_idx_type col = a.ridx(j); \ - EL_TYPE tmpval = a.data(j); \ - for (octave_idx_type k = m.cidx(col) ; k < m.cidx(col+1); k++) \ + for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ { \ - OCTAVE_QUIT; \ - octave_idx_type row = m.ridx(k); \ - if (w[row] < i + 1) \ - { \ - w[row] = i + 1; \ - retval.xridx(ii++) = row; \ - Xcol[row] = tmpval * m.data(k); \ - } \ - else \ - Xcol[row] += tmpval * m.data(k); \ + octave_idx_type col = a.ridx(j); \ + EL_TYPE tmpval = a.data(j); \ + for (octave_idx_type k = m.cidx(col) ; \ + k < m.cidx(col+1); k++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type row = m.ridx(k); \ + if (w[row] < i + 1) \ + { \ + w[row] = i + 1; \ + retval.xridx(ii++) = row;\ + Xcol[row] = tmpval * m.data(k); \ + } \ + else \ + Xcol[row] += tmpval * m.data(k); \ + } \ } \ - } \ - retval.xcidx(i+1) = ii; \ - for (octave_idx_type k = retval.xcidx(i); k < retval.xcidx(i+1); k++) \ - retval.xdata(k) = Xcol[retval.xridx(k)]; \ + sort.sort (ri + retval.xcidx(i), ii - retval.xcidx(i)); \ + for (octave_idx_type k = retval.xcidx(i); k < ii; k++) \ + retval.xdata(k) = Xcol[retval.xridx(k)]; \ + retval.xcidx(i+1) = ii; \ + } \ + retval.maybe_compress (true);\ + } \ + else \ + { \ + retval.xcidx(0) = 0; \ + for (octave_idx_type i = 0; i < a_nc ; i++) \ + { \ + for (octave_idx_type j = a.cidx(i); j < a.cidx(i+1); j++) \ + { \ + octave_idx_type col = a.ridx(j); \ + EL_TYPE tmpval = a.data(j); \ + for (octave_idx_type k = m.cidx(col) ; \ + k < m.cidx(col+1); k++) \ + { \ + OCTAVE_QUIT; \ + octave_idx_type row = m.ridx(k); \ + if (w[row] < i + 1) \ + { \ + w[row] = i + 1; \ + Xcol[row] = tmpval * m.data(k); \ + } \ + else \ + Xcol[row] += tmpval * m.data(k); \ + } \ + } \ + for (octave_idx_type k = 0; k < nr; k++) \ + if (w[k] == i + 1 && Xcol[k] != 0.) \ + { \ + retval.xdata(ii) = Xcol[k]; \ + retval.xridx(ii++) = k; \ + } \ + retval.xcidx(i+1) = ii; \ + } \ + retval.maybe_compress ();\ } \ - retval.maybe_compress (true); \ return retval; \ } \ }