Mercurial > hg > octave-nkf
changeset 10367:173e10268080
avoid indexing nonexistent elements in sparse diag
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sat, 27 Feb 2010 16:05:16 -0500 |
parents | e5ae13b8b2c2 |
children | d2849dbcc858 |
files | liboctave/ChangeLog liboctave/Sparse.cc src/DLD-FUNCTIONS/conv2.cc |
diffstat | 3 files changed, 89 insertions(+), 57 deletions(-) [+] |
line wrap: on
line diff
--- a/liboctave/ChangeLog +++ b/liboctave/ChangeLog @@ -1,3 +1,8 @@ +2010-02-27 John W. Eaton <jwe@octave.org> + + * Sparse.cc (Sparse<T>::diag): Handle case of diag (szv) when szv + is a sparse vector with nnz = 0. + 2010-02-27 Jaroslav Hajek <highegg@gmail.com> * Array-util.cc (gripe_index_out_of_range): New function.
--- a/liboctave/Sparse.cc +++ b/liboctave/Sparse.cc @@ -2344,43 +2344,59 @@ { octave_idx_type n = nnc + std::abs (k); octave_idx_type nz = nzmax (); + d = Sparse<T> (n, n, nz); - for (octave_idx_type i = 0; i < coff+1; i++) - d.xcidx (i) = 0; - for (octave_idx_type j = 0; j < nnc; j++) + + if (nnz () > 0) { - for (octave_idx_type i = cidx(j); i < cidx(j+1); i++) + for (octave_idx_type i = 0; i < coff+1; i++) + d.xcidx (i) = 0; + + for (octave_idx_type j = 0; j < nnc; j++) { - d.xdata (i) = data (i); - d.xridx (i) = j + roff; + for (octave_idx_type i = cidx(j); i < cidx(j+1); i++) + { + d.xdata (i) = data (i); + d.xridx (i) = j + roff; + } + d.xcidx (j + coff + 1) = cidx(j+1); } - d.xcidx (j + coff + 1) = cidx(j+1); + + for (octave_idx_type i = nnc + coff + 1; i < n + 1; i++) + d.xcidx (i) = nz; } - for (octave_idx_type i = nnc + coff + 1; i < n + 1; i++) - d.xcidx (i) = nz; } else { octave_idx_type n = nnr + std::abs (k); octave_idx_type nz = nzmax (); - octave_idx_type ii = 0; - octave_idx_type ir = ridx(0); + d = Sparse<T> (n, n, nz); - for (octave_idx_type i = 0; i < coff+1; i++) - d.xcidx (i) = 0; - for (octave_idx_type i = 0; i < nnr; i++) + + if (nnz () > 0) { - if (ir == i) + octave_idx_type ii = 0; + octave_idx_type ir = ridx(0); + + for (octave_idx_type i = 0; i < coff+1; i++) + d.xcidx (i) = 0; + + for (octave_idx_type i = 0; i < nnr; i++) { - d.xdata (ii) = data (ii); - d.xridx (ii++) = ir + roff; - if (ii != nz) - ir = ridx (ii); + if (ir == i) + { + d.xdata (ii) = data (ii); + d.xridx (ii++) = ir + roff; + + if (ii != nz) + ir = ridx (ii); + } + d.xcidx (i + coff + 1) = ii; } - d.xcidx (i + coff + 1) = ii; + + for (octave_idx_type i = nnr + coff + 1; i < n+1; i++) + d.xcidx (i) = nz; } - for (octave_idx_type i = nnr + coff + 1; i < n+1; i++) - d.xcidx (i) = nz; } }
--- a/src/DLD-FUNCTIONS/conv2.cc +++ b/src/DLD-FUNCTIONS/conv2.cc @@ -30,30 +30,32 @@ #include "oct-obj.h" #include "utils.h" -#define MAX(a,b) ((a) > (b) ? (a) : (b)) - enum Shape { SHAPE_FULL, SHAPE_SAME, SHAPE_VALID }; #if !defined (CXX_NEW_FRIEND_TEMPLATE_DECL) extern MArray<double> -conv2 (MArray<double>&, MArray<double>&, MArray<double>&, Shape); +conv2 (const MArray<double>&, const MArray<double>&, const MArray<double>&, + Shape); extern MArray<Complex> -conv2 (MArray<Complex>&, MArray<Complex>&, MArray<Complex>&, Shape); +conv2 (const MArray<Complex>&, const MArray<Complex>&, + const MArray<Complex>&, Shape); extern MArray<float> -conv2 (MArray<float>&, MArray<float>&, MArray<float>&, Shape); +conv2 (const MArray<float>&, const MArray<float>&, const MArray<float>&, + Shape); extern MArray<FloatComplex> -conv2 (MArray<FloatComplex>&, MArray<FloatComplex>&, MArray<FloatComplex>&, Shape); +conv2 (const MArray<FloatComplex>&, const MArray<FloatComplex>&, + const MArray<FloatComplex>&, Shape); #endif template <class T> MArray<T> -conv2 (MArray<T>& R, MArray<T>& C, MArray<T>& A, Shape ishape) +conv2 (const MArray<T>& R, const MArray<T>& C, const MArray<T>& A, Shape ishape) { - octave_idx_type Rn = R.length (); - octave_idx_type Cm = C.length (); + octave_idx_type Rn = R.length (); + octave_idx_type Cm = C.length (); octave_idx_type Am = A.rows (); octave_idx_type An = A.columns (); @@ -113,8 +115,8 @@ { T sum = 0; - octave_idx_type ci = Cm - 1 - MAX(0, edgM-oi); - octave_idx_type ai = MAX(0, oi-edgM); + octave_idx_type ci = Cm - 1 - std::max (0, edgM-oi); + octave_idx_type ai = std::max (0, oi-edgM); const T* Ad = A.data() + ai + Am*oj; const T* Cd = C.data() + ci; for ( ; ci >= 0 && ai < Am; ci--, Cd--, ai++, Ad++) @@ -127,15 +129,15 @@ { T sum = 0; - octave_idx_type rj = Rn - 1 - MAX(0, edgN-oj); - octave_idx_type aj = MAX(0, oj-edgN) ; + octave_idx_type rj = Rn - 1 - std::max (0, edgN-oj); + octave_idx_type aj = std::max (0, oj-edgN); const T* Xd = X.data() + aj; const T* Rd = R.data() + rj; for ( ; rj >= 0 && aj < An; rj--, Rd--, aj++, Xd++) sum += (*Xd) * (*Rd); - O(oi,oj)= sum; + O(oi,oj) = sum; } } @@ -158,7 +160,7 @@ template <class T> MArray<T> -conv2 (MArray<T>&A, MArray<T>&B, Shape ishape) +conv2 (const MArray<T>& A, const MArray<T>& B, Shape ishape) { // Convolution works fastest if we choose the A matrix to be // the largest. @@ -210,29 +212,36 @@ MArray<T> O (outM, outN); - for (octave_idx_type oi = 0; oi < outM; oi++) + T *Od = O.fortran_vec (); + + for (octave_idx_type oj = 0; oj < outN; oj++) { - for (octave_idx_type oj = 0; oj < outN; oj++) + octave_idx_type aj0 = std::max (0, oj-edgN); + octave_idx_type bj0 = Bn - 1 - std::max (0, edgN-oj); + + for (octave_idx_type oi = 0; oi < outM; oi++) { T sum = 0; - for (octave_idx_type bj = Bn - 1 - MAX (0, edgN-oj), aj= MAX (0, oj-edgN); - bj >= 0 && aj < An; bj--, aj++) + octave_idx_type bi0 = Bm - 1 - std::max (0, edgM-oi); + octave_idx_type ai0 = std::max (0, oi-edgM); + + for (octave_idx_type aj = aj0, bj = bj0; bj >= 0 && aj < An; + bj--, aj++) { - octave_idx_type bi = Bm - 1 - MAX (0, edgM-oi); - octave_idx_type ai = MAX (0, oi-edgM); - const T* Ad = A.data () + ai + Am*aj; - const T* Bd = B.data () + bi + Bm*bj; + const T* Ad = A.data () + ai0 + Am*aj; + const T* Bd = B.data () + bi0 + Bm*bj; - for ( ; bi >= 0 && ai < Am; bi--, Bd--, ai++, Ad++) + for (octave_idx_type ai = ai0, bi = bi0; bi >= 0 && ai < Am; + bi--, ai++) { - sum += (*Ad) * (*Bd); + sum += (*Ad++) * (*Bd--); // Comment: it seems to be 2.5 x faster than this: // sum+= A(ai,aj) * B(bi,bj); } } - O(oi,oj) = sum; + *Od++ = sum; } } @@ -258,11 +267,11 @@ of @var{c} is given by\n\ \n\ @table @asis\n\ -@item @var{shape}= 'full'\n\ +@item @var{shape} = 'full'\n\ returns full 2-D convolution\n\ -@item @var{shape}= 'same'\n\ +@item @var{shape} = 'same'\n\ same size as a. 'central' part of convolution\n\ -@item @var{shape}= 'valid'\n\ +@item @var{shape} = 'valid'\n\ only parts which do not include zero-padded edges\n\ @end table\n\ \n\ @@ -274,8 +283,8 @@ octave_value retval; octave_value tmp; int nargin = args.length (); - std::string shape= "full"; //default - bool separable= false; + std::string shape = "full"; //default + bool separable = false; Shape ishape; if (nargin < 2) @@ -419,13 +428,15 @@ } template MArray<double> -conv2 (MArray<double>&, MArray<double>&, MArray<double>&, Shape); +conv2 (const MArray<double>&, const MArray<double>&, const MArray<double>&, + Shape); template MArray<double> -conv2 (MArray<double>&, MArray<double>&, Shape); +conv2 (const MArray<double>&, const MArray<double>&, Shape); template MArray<Complex> -conv2 (MArray<Complex>&, MArray<Complex>&, MArray<Complex>&, Shape); +conv2 (const MArray<Complex>&, const MArray<Complex>&, + const MArray<Complex>&, Shape); template MArray<Complex> -conv2 (MArray<Complex>&, MArray<Complex>&, Shape); +conv2 (const MArray<Complex>&, const MArray<Complex>&, Shape);