Mercurial > hg > octave-nkf

--- a/src/DLD-FUNCTIONS/conv2.cc
+++ b/src/DLD-FUNCTIONS/conv2.cc
@@ -30,32 +30,30 @@
 #include "oct-obj.h"
 #include "utils.h"

+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
 enum Shape { SHAPE_FULL, SHAPE_SAME, SHAPE_VALID };

 #if !defined (CXX_NEW_FRIEND_TEMPLATE_DECL)
 extern MArray<double>
-conv2 (const MArray<double>&, const MArray<double>&, const MArray<double>&,
-       Shape);
+conv2 (MArray<double>&, MArray<double>&, MArray<double>&, Shape);

 extern MArray<Complex>
-conv2 (const MArray<Complex>&, const MArray<Complex>&,
-       const MArray<Complex>&, Shape);
+conv2 (MArray<Complex>&, MArray<Complex>&, MArray<Complex>&, Shape);

 extern MArray<float>
-conv2 (const MArray<float>&, const MArray<float>&, const MArray<float>&,
-       Shape);
+conv2 (MArray<float>&, MArray<float>&, MArray<float>&, Shape);

 extern MArray<FloatComplex>
-conv2 (const MArray<FloatComplex>&, const MArray<FloatComplex>&,
-       const MArray<FloatComplex>&, Shape);
+conv2 (MArray<FloatComplex>&, MArray<FloatComplex>&, MArray<FloatComplex>&, Shape);
 #endif

 template <class T>
 MArray<T>
-conv2 (const MArray<T>& R, const MArray<T>& C, const MArray<T>& A, Shape ishape)
+conv2 (MArray<T>& R, MArray<T>& C, MArray<T>& A, Shape ishape)
 {
-  octave_idx_type  Rn = R.length ();
-  octave_idx_type  Cm = C.length ();
+  octave_idx_type  Rn =  R.length ();
+  octave_idx_type  Cm =  C.length ();
   octave_idx_type  Am = A.rows ();
   octave_idx_type  An = A.columns ();

@@ -115,8 +113,8 @@
         {
           T sum = 0;

-          octave_idx_type ci = Cm - 1 - std::max (0, edgM-oi);
-          octave_idx_type ai = std::max (0, oi-edgM);
+          octave_idx_type ci = Cm - 1 - MAX(0, edgM-oi);
+          octave_idx_type ai = MAX(0, oi-edgM);
           const T* Ad = A.data() + ai + Am*oj;
           const T* Cd = C.data() + ci;
           for ( ; ci >= 0 && ai < Am; ci--, Cd--, ai++, Ad++)
@@ -129,15 +127,15 @@
         {
           T sum = 0;

-          octave_idx_type rj = Rn - 1 - std::max (0, edgN-oj);
-          octave_idx_type aj = std::max (0, oj-edgN);
+          octave_idx_type rj = Rn - 1 - MAX(0, edgN-oj);
+          octave_idx_type aj = MAX(0, oj-edgN) ;
           const T* Xd = X.data() + aj;
           const T* Rd = R.data() + rj;

           for ( ; rj >= 0 && aj < An; rj--, Rd--, aj++, Xd++)
             sum += (*Xd) * (*Rd);

-          O(oi,oj) = sum;
+          O(oi,oj)=  sum;
         }
     }

@@ -160,7 +158,7 @@

 template <class T>
 MArray<T>
-conv2 (const MArray<T>& A, const MArray<T>& B, Shape ishape)
+conv2 (MArray<T>&A, MArray<T>&B, Shape ishape)
 {
   // Convolution works fastest if we choose the A matrix to be
   // the largest.
@@ -212,36 +210,29 @@

   MArray<T> O (outM, outN);

-  T *Od = O.fortran_vec ();
-
-  for (octave_idx_type oj = 0; oj < outN; oj++)
+  for (octave_idx_type oi = 0; oi < outM; oi++)
     {
-      octave_idx_type aj0 = std::max (0, oj-edgN);
-      octave_idx_type bj0 = Bn - 1 - std::max (0, edgN-oj);
-
-      for (octave_idx_type oi = 0; oi < outM; oi++)
+      for (octave_idx_type oj = 0; oj < outN; oj++)
         {
           T sum = 0;

-          octave_idx_type bi0 = Bm - 1 - std::max (0, edgM-oi);
-          octave_idx_type ai0 = std::max (0, oi-edgM);
-
-          for (octave_idx_type aj = aj0, bj = bj0; bj >= 0 && aj < An;
-               bj--, aj++)
+          for (octave_idx_type bj = Bn - 1 - MAX (0, edgN-oj), aj= MAX (0, oj-edgN);
+               bj >= 0 && aj < An; bj--, aj++)
             {
-              const T* Ad = A.data () + ai0 + Am*aj;
-              const T* Bd = B.data () + bi0 + Bm*bj;
+              octave_idx_type bi = Bm - 1 - MAX (0, edgM-oi);
+              octave_idx_type ai = MAX (0, oi-edgM);
+              const T* Ad = A.data () + ai + Am*aj;
+              const T* Bd = B.data () + bi + Bm*bj;

-              for (octave_idx_type ai = ai0, bi = bi0; bi >= 0 && ai < Am;
-                   bi--, ai++)
+              for ( ; bi >= 0 && ai < Am; bi--, Bd--, ai++, Ad++)
                 {
-                  sum += (*Ad++) * (*Bd--);
+                  sum += (*Ad) * (*Bd);
                   // Comment: it seems to be 2.5 x faster than this:
                   //        sum+= A(ai,aj) * B(bi,bj);
                 }
             }

-          *Od++ = sum;
+          O(oi,oj) = sum;
         }
     }

@@ -267,11 +258,11 @@
 of @var{c} is given by\n\
 \n\
 @table @asis\n\
-@item @var{shape} = 'full'\n\
+@item @var{shape}= 'full'\n\
 returns full 2-D convolution\n\
-@item @var{shape} = 'same'\n\
+@item @var{shape}= 'same'\n\
 same size as a. 'central' part of convolution\n\
-@item @var{shape} = 'valid'\n\
+@item @var{shape}= 'valid'\n\
 only parts which do not include zero-padded edges\n\
 @end table\n\
 \n\
@@ -283,8 +274,8 @@
   octave_value retval;
   octave_value tmp;
   int nargin = args.length ();
-  std::string shape = "full"; //default
-  bool separable = false;
+  std::string shape= "full"; //default
+  bool separable= false;
   Shape ishape;

   if (nargin < 2)
@@ -428,15 +419,13 @@
 }

 template MArray<double>
-conv2 (const MArray<double>&, const MArray<double>&, const MArray<double>&,
-       Shape);
+conv2 (MArray<double>&, MArray<double>&, MArray<double>&, Shape);

 template MArray<double>
-conv2 (const MArray<double>&, const MArray<double>&, Shape);
+conv2 (MArray<double>&, MArray<double>&, Shape);

 template MArray<Complex>
-conv2 (const MArray<Complex>&, const MArray<Complex>&,
-       const MArray<Complex>&, Shape);
+conv2 (MArray<Complex>&, MArray<Complex>&, MArray<Complex>&, Shape);

 template MArray<Complex>
-conv2 (const MArray<Complex>&, const MArray<Complex>&, Shape);
+conv2 (MArray<Complex>&, MArray<Complex>&, Shape);