octave-nkf: liboctave/fMatrix.cc comparison

comparison liboctave/fMatrix.cc @ 7804:a0c550b22e61

compound ops for float matrices

author	Jaroslav Hajek <highegg@gmail.com>
date	Wed, 21 May 2008 19:25:08 +0200
parents	f42c6f8d6d8e
children	935be827eaf8

comparison

equal deleted inserted replaced

-:9bcb31cc56be
+:a0c550b22e61
 // Matrix manipulations.
 /*
 Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
 2003, 2004, 2005, 2006, 2007 John W. Eaton
+Copyright (C) 2008 Jaroslav Hajek
 This file is part of Octave.
 Octave is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 			   F77_CHAR_ARG_LEN_DECL);
 F77_RET_T
 F77_FUNC (xsdot, XSDOT) (const octave_idx_type&, const float*, const octave_idx_type&,
 			   const float*, const octave_idx_type&, float&);
+F77_RET_T
+F77_FUNC (ssyrk, SSYRK) (F77_CONST_CHAR_ARG_DECL,
+			   F77_CONST_CHAR_ARG_DECL,
+			   const octave_idx_type&, const octave_idx_type&,
+			   const float&, const float*, const octave_idx_type&,
+			   const float&, float*, const octave_idx_type&
+			   F77_CHAR_ARG_LEN_DECL
+			   F77_CHAR_ARG_LEN_DECL);
 F77_RET_T
 F77_FUNC (sgetrf, SGETRF) (const octave_idx_type&, const octave_idx_type&, float*, const octave_idx_type&,
 		      octave_idx_type*, octave_idx_type&);
 %!assert([M*cv,M*cv],M*[cv,cv],1e-14)
 %!assert([rv*M;rv*M],[rv;rv]*M,1e-14)
 %!assert(2*rv*cv,[rv,rv]*[cv;cv],1e-14)
 */
+static const char *
-FloatMatrix
+get_blas_trans_arg (bool trans)
-operator * (const FloatMatrix& m, const FloatMatrix& a)
+{
+static char blas_notrans = 'N', blas_trans = 'T';
+return (trans) ? &blas_trans : &blas_notrans;
+}
+// the general GEMM operation
+FloatMatrix
+xgemm (bool transa, const FloatMatrix& a, bool transb, const FloatMatrix& b)
 {
 FloatMatrix retval;
-octave_idx_type nr = m.rows ();
+octave_idx_type a_nr = transa ? a.cols () : a.rows ();
-octave_idx_type nc = m.cols ();
+octave_idx_type a_nc = transa ? a.rows () : a.cols ();
-octave_idx_type a_nr = a.rows ();
+octave_idx_type b_nr = transb ? b.cols () : b.rows ();
-octave_idx_type a_nc = a.cols ();
+octave_idx_type b_nc = transb ? b.rows () : b.cols ();
-if (nc != a_nr)
+if (a_nc != b_nr)
-gripe_nonconformant ("operator *", nr, nc, a_nr, a_nc);
+gripe_nonconformant ("operator *", a_nr, a_nc, b_nr, b_nc);
 else
 {
-if (nr == 0 || nc == 0 || a_nc == 0)
+if (a_nr == 0 || a_nc == 0 || b_nc == 0)
-	retval.resize (nr, a_nc, 0.0);
+	retval.resize (a_nr, b_nc, 0.0);
+else if (a.data () == b.data () && a_nr == b_nc && transa != transb)
+{
+	  octave_idx_type lda = a.rows ();
+retval.resize (a_nr, b_nc);
+	  float *c = retval.fortran_vec ();
+const char *ctransa = get_blas_trans_arg (transa);
+F77_XFCN (ssyrk, SSYRK, (F77_CONST_CHAR_ARG2 ("U", 1),
+F77_CONST_CHAR_ARG2 (ctransa, 1),
+a_nr, a_nc, 1.0,
+a.data (), lda, 0.0, c, a_nr
+F77_CHAR_ARG_LEN (1)
+F77_CHAR_ARG_LEN (1)));
+for (int j = 0; j < a_nr; j++)
+for (int i = 0; i < j; i++)
+retval.xelem (j,i) = retval.xelem (i,j);
+}
 else
 	{
-	  octave_idx_type ld  = nr;
+	  octave_idx_type lda = a.rows (), tda = a.cols ();
-	  octave_idx_type lda = a_nr;
+	  octave_idx_type ldb = b.rows (), tdb = b.cols ();
-	  retval.resize (nr, a_nc);
+	  retval.resize (a_nr, b_nc);
 	  float *c = retval.fortran_vec ();
-	  if (a_nc == 1)
+	  if (b_nc == 1)
 	    {
-	      if (nr == 1)
+	      if (a_nr == 1)
-		F77_FUNC (xsdot, XSDOT) (nc, m.data (), 1, a.data (), 1, *c);
+		F77_FUNC (xsdot, XSDOT) (a_nc, a.data (), 1, b.data (), 1, *c);
 	      else
 		{
-		  F77_XFCN (sgemv, SGEMV, (F77_CONST_CHAR_ARG2 ("N", 1),
+const char *ctransa = get_blas_trans_arg (transa);
-					   nr, nc, 1.0,  m.data (), ld,
+		  F77_XFCN (sgemv, SGEMV, (F77_CONST_CHAR_ARG2 (ctransa, 1),
-					   a.data (), 1, 0.0, c, 1
+					   lda, tda, 1.0,  a.data (), lda,
+					   b.data (), 1, 0.0, c, 1
 					   F77_CHAR_ARG_LEN (1)));
 		}
 }
+else if (a_nr == 1)
+{
+const char *crevtransb = get_blas_trans_arg (! transb);
+F77_XFCN (sgemv, SGEMV, (F77_CONST_CHAR_ARG2 (crevtransb, 1),
+ldb, tdb, 1.0,  b.data (), ldb,
+a.data (), 1, 0.0, c, 1
+F77_CHAR_ARG_LEN (1)));
+}
 	  else
 	    {
-	      F77_XFCN (sgemm, SGEMM, (F77_CONST_CHAR_ARG2 ("N", 1),
+const char *ctransa = get_blas_trans_arg (transa);
-				       F77_CONST_CHAR_ARG2 ("N", 1),
+const char *ctransb = get_blas_trans_arg (transb);
-				       nr, a_nc, nc, 1.0, m.data (),
+	      F77_XFCN (sgemm, SGEMM, (F77_CONST_CHAR_ARG2 (ctransa, 1),
-				       ld, a.data (), lda, 0.0, c, nr
+				       F77_CONST_CHAR_ARG2 (ctransb, 1),
+				       a_nr, b_nc, a_nc, 1.0, a.data (),
+				       lda, b.data (), ldb, 0.0, c, a_nr
 				       F77_CHAR_ARG_LEN (1)
 				       F77_CHAR_ARG_LEN (1)));
 	    }
 	}
 }
 return retval;
+}
+FloatMatrix
+operator * (const FloatMatrix& a, const FloatMatrix& b)
+{
+return xgemm (false, a, false, b);
 }
 // FIXME -- it would be nice to share code among the min/max
 // functions below.

Mercurial > hg > octave-nkf

comparison liboctave/fMatrix.cc @ 7804:a0c550b22e61