diff liboctave/oct-norm.cc @ 8303:b11c31849b44

improve norm computation capabilities
author Jaroslav Hajek <highegg@gmail.com>
date Fri, 31 Oct 2008 08:05:32 +0100
parents
children 5fe0f4dfdbec
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/liboctave/oct-norm.cc
@@ -0,0 +1,562 @@
+/*
+
+Copyright (C) 2008 VZLU Prague, a.s.
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+// author: Jaroslav Hajek <highegg@gmail.com>
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <cassert>
+#include <cfloat>
+#include <cmath>
+
+#include <iostream>
+
+#include "oct-cmplx.h"
+#include "lo-error.h"
+#include "lo-ieee.h"
+#include "Array.h"
+#include "Array.cc"
+#include "Array-util.h"
+#include "CMatrix.h"
+#include "dMatrix.h"
+#include "fCMatrix.h"
+#include "fMatrix.h"
+#include "CColVector.h"
+#include "dColVector.h"
+#include "CRowVector.h"
+#include "dRowVector.h"
+#include "fCColVector.h"
+#include "fColVector.h"
+#include "fCRowVector.h"
+#include "fRowVector.h"
+#include "CSparse.h"
+#include "dSparse.h"
+#include "dbleSVD.h"
+#include "CmplxSVD.h"
+#include "floatSVD.h"
+#include "fCmplxSVD.h"
+
+// Theory: norm accumulator is an object that has an accum method able to handle
+// both real and complex element, and a cast operator returning the intermediate
+// norm.
+
+// norm accumulator for the p-norm
+template <class R>
+class norm_accumulator_p
+{
+  R p,scl,sum;
+public:
+  norm_accumulator_p () {} // we need this one for Array
+  norm_accumulator_p (R pp) : p(pp), scl(0), sum(1) {}
+
+  template<class U> 
+  void accum (U val)
+    {
+      R t = std::abs (val);
+      if (scl == t) // we need this to handle Infs properly
+        sum += 1;
+      else if (scl < t)
+        {
+          sum *= std::pow (scl/t, p);
+          sum += 1;
+          scl = t;
+        }
+      else if (t != 0)
+        sum += std::pow (t/scl, p);
+    }
+  operator R () { return scl * std::pow (sum, 1/p); }
+};
+
+// norm accumulator for the minus p-pseudonorm
+template <class R>
+class norm_accumulator_mp
+{
+  R p,scl,sum;
+public:
+  norm_accumulator_mp () {} // we need this one for Array
+  norm_accumulator_mp (R pp) : p(pp), scl(0), sum(1) {}
+
+  template<class U> 
+  void accum (U val)
+    {
+      R t = 1 / std::abs (val);
+      if (scl == t)
+        sum += 1;
+      else if (scl < t)
+        {
+          sum *= std::pow (scl/t, p);
+          sum += 1;
+          scl = t;
+        }
+      else if (t != 0)
+        sum += std::pow (t/scl, p);
+    }
+  operator R () { return scl * std::pow (sum, -1/p); }
+};
+
+// norm accumulator for the 2-norm (euclidean)
+template <class R>
+class norm_accumulator_2
+{
+  R scl,sum;
+  static R pow2 (R x) { return x*x; }
+public:
+  norm_accumulator_2 () : scl(0), sum(1) {}
+
+  void accum (R val)
+    {
+      R t = std::abs (val);
+      if (scl == t)
+        sum += 1;
+      else if (scl < t)
+        {
+          sum *= pow2 (scl/t);
+          sum += 1;
+          scl = t;
+        }
+      else if (t != 0)
+        sum += pow2 (t/scl);
+    }
+
+  void accum (std::complex<R> val)
+    {
+      accum (val.real ());
+      accum (val.imag ());
+    }
+
+  operator R () { return scl * std::sqrt (sum); }
+};
+
+// norm accumulator for the 1-norm (city metric)
+template <class R>
+class norm_accumulator_1
+{
+  R sum;
+public:
+  norm_accumulator_1 () : sum (0) {}
+  template<class U> 
+  void accum (U val)
+    {
+      sum += std::abs (val); 
+    }
+  operator R () { return sum; }
+};
+
+// norm accumulator for the inf-norm (max metric)
+template <class R>
+class norm_accumulator_inf
+{
+  R max;
+public:
+  norm_accumulator_inf () : max (0) {}
+  template<class U> 
+  void accum (U val)
+    {
+      max = std::max (max, std::abs (val));
+    }
+  operator R () { return max; }
+};
+
+// norm accumulator for the -inf pseudonorm (min abs value)
+template <class R>
+class norm_accumulator_minf
+{
+  R min;
+public:
+  norm_accumulator_minf () : min (octave_Inf) {}
+  template<class U> 
+  void accum (U val)
+    {
+      min = std::min (min, std::abs (val));
+    }
+  operator R () { return min; }
+};
+
+// norm accumulator for the 0-pseudonorm (hamming distance)
+template <class R>
+class norm_accumulator_0
+{
+  unsigned int num;
+public:
+  norm_accumulator_0 () : num (0) {}
+  template<class U> 
+  void accum (U val)
+    {
+      if (val != static_cast<U> (0)) ++num;
+    }
+  operator R () { return num; }
+};
+
+
+// OK, we're armed :) Now let's go for the fun
+
+template <class T, class R, class ACC>
+inline void vector_norm (const Array<T>& v, R& res, ACC acc)
+{
+  for (octave_idx_type i = 0; i < v.numel (); i++)
+    acc.accum (v(i));
+
+  res = acc;
+}
+
+// dense versions
+template <class T, class R, class ACC>
+void column_norms (const MArray2<T>& m, MArray<R>& res, ACC acc)
+{
+  res.resize (m.columns ());
+  for (octave_idx_type j = 0; j < m.columns (); j++)
+    {
+      ACC accj = acc;
+      for (octave_idx_type i = 0; i < m.rows (); i++)
+        accj.accum (m(i, j));
+
+      res.xelem (j) = accj;
+    }
+}
+
+template <class T, class R, class ACC>
+void row_norms (const MArray2<T>& m, MArray<R>& res, ACC acc)
+{
+  res.resize (m.rows ());
+  Array<ACC> acci (m.rows (), acc); 
+  for (octave_idx_type j = 0; j < m.columns (); j++)
+    {
+      for (octave_idx_type i = 0; i < m.rows (); i++)
+        acci.xelem (i).accum (m(i, j));
+    }
+
+  for (octave_idx_type i = 0; i < m.rows (); i++)
+    res.xelem (i) = acci(i);
+}
+
+// sparse versions
+template <class T, class R, class ACC>
+void column_norms (const MSparse<T>& m, MArray<R>& res, ACC acc)
+{
+  res.resize (m.columns ());
+  for (octave_idx_type j = 0; j < m.columns (); j++)
+    {
+      ACC accj = acc;
+      for (octave_idx_type k = m.cidx (j); k < m.cidx (j+1); k++)
+        accj.accum (m.data (k));
+
+      res.xelem (j) = accj;
+    }
+}
+
+template <class T, class R, class ACC>
+void row_norms (const MSparse<T>& m, MArray<R>& res, ACC acc)
+{
+  res.resize (m.rows ());
+  Array<ACC> acci (m.rows (), acc); 
+  for (octave_idx_type j = 0; j < m.columns (); j++)
+    {
+      for (octave_idx_type k = m.cidx (j); k < m.cidx (j+1); k++)
+        acci(m.ridx (k)).accum (m.data (k));
+    }
+
+  for (octave_idx_type i = 0; i < m.rows (); i++)
+    res.xelem (i) = acci(i);
+}
+
+// now the dispatchers
+#define DEFINE_DISPATCHER(FUNC_NAME, ARG_TYPE, RES_TYPE) \
+template <class T, class R> \
+RES_TYPE FUNC_NAME (const ARG_TYPE& v, R p) \
+{ \
+  RES_TYPE res; \
+  if (p == 2) \
+    FUNC_NAME (v, res, norm_accumulator_2<R> ()); \
+  else if (p == 1) \
+    FUNC_NAME (v, res, norm_accumulator_1<R> ()); \
+  else if (lo_ieee_isinf (p)) \
+    { \
+      if (p > 0) \
+        FUNC_NAME (v, res, norm_accumulator_inf<R> ()); \
+      else \
+        FUNC_NAME (v, res, norm_accumulator_minf<R> ()); \
+    } \
+  else if (p == 0) \
+    FUNC_NAME (v, res, norm_accumulator_0<R> ()); \
+  else if (p > 0) \
+    FUNC_NAME (v, res, norm_accumulator_p<R> (p)); \
+  else \
+    FUNC_NAME (v, res, norm_accumulator_mp<R> (p)); \
+  return res; \
+}
+
+DEFINE_DISPATCHER (vector_norm, MArray<T>, R)
+DEFINE_DISPATCHER (vector_norm, MArray2<T>, R)
+DEFINE_DISPATCHER (column_norms, MArray2<T>, MArray<R>)
+DEFINE_DISPATCHER (row_norms, MArray2<T>, MArray<R>)
+DEFINE_DISPATCHER (column_norms, MSparse<T>, MArray<R>)
+DEFINE_DISPATCHER (row_norms, MSparse<T>, MArray<R>)
+
+// The approximate subproblem in Higham's method. Find lambda and mu such that
+// norm ([lambda, mu], p) == 1 and norm (y*lambda + col*mu, p) is maximized.
+// Real version. As in Higham's paper.
+template <class ColVectorT, class R>
+static void 
+higham_subp (const ColVectorT& y, const ColVectorT& col, 
+             octave_idx_type nsamp, R p, R& lambda, R& mu)
+{
+  R nrm = 0;
+  for (octave_idx_type i = 0; i < nsamp; i++)
+    {
+      R fi = i*M_PI/nsamp, lambda1 = cos (fi), mu1 = sin (fi);
+      R lmnr = std::pow (std::pow (std::abs (lambda1), p) + 
+                         std::pow (std::abs (mu1), p), 1/p);
+      lambda1 /= lmnr; mu1 /= lmnr;
+      R nrm1 = vector_norm (lambda1 * y + mu1 * col, p);
+      if (nrm1 > nrm)
+        {
+          lambda = lambda1;
+          mu = mu1;
+          nrm = nrm1;
+        }
+    }
+}
+
+// Complex version. Higham's paper does not deal with complex case, so we use a simple
+// extension. First, guess the magnitudes as in real version, then try to rotate lambda
+// to improve further.
+template <class ColVectorT, class R>
+static void 
+higham_subp (const ColVectorT& y, const ColVectorT& col, 
+             octave_idx_type nsamp, R p, 
+             std::complex<R>& lambda, std::complex<R>& mu)
+{
+  typedef std::complex<R> CR;
+  R nrm = 0;
+  lambda = 1.0;
+  CR lamcu = lambda / std::abs (lambda);
+  // Probe magnitudes
+  for (octave_idx_type i = 0; i < nsamp; i++)
+    {
+      R fi = i*M_PI/nsamp, lambda1 = cos (fi), mu1 = sin (fi);
+      R lmnr = std::pow (std::pow (std::abs (lambda1), p) + 
+                         std::pow (std::abs (mu1), p), 1/p);
+      lambda1 /= lmnr; mu1 /= lmnr;
+      R nrm1 = vector_norm (lambda1 * lamcu * y + mu1 * col, p);
+      if (nrm1 > nrm)
+        {
+          lambda = lambda1 * lamcu;
+          mu = mu1;
+          nrm = nrm1;
+        }
+    }
+  R lama = std::abs (lambda);
+  // Probe orientation
+  for (octave_idx_type i = 0; i < nsamp; i++)
+    {
+      R fi = i*M_PI/nsamp;
+      lamcu = CR (cos (fi), sin (fi));
+      R nrm1 = vector_norm (lama * lamcu * y + mu * col, p);
+      if (nrm1 > nrm)
+        {
+          lambda = lama * lamcu;
+          nrm = nrm1;
+        }
+    }
+}
+
+// the p-dual element (should work for both real and complex)
+template <class T, class R>
+inline T elem_dual_p (T x, R p)
+{
+  return signum (x) * std::pow (std::abs (x), p-1);
+}
+
+// the VectorT is used for vectors, but actually it has to be
+// a Matrix type to allow all the operations. For instance SparseMatrix
+// does not support multiplication with column/row vectors.
+// the dual vector
+template <class VectorT, class R>
+VectorT dual_p (const VectorT& x, R p, R q)
+{
+  VectorT res (x.dims ());
+  for (octave_idx_type i = 0; i < x.numel (); i++)
+    res.xelem (i) = elem_dual_p (x(i), p);
+  return res / vector_norm (res, q);
+}
+
+// Higham's hybrid method
+template <class MatrixT, class VectorT, class R>
+R higham (const MatrixT& m, R p, R tol, int maxiter,
+          VectorT& x)
+{
+  x.resize (m.columns (), 1);
+  // the OSE part
+  VectorT y(m.rows (), 1, 0), z(m.rows (), 1);
+  typedef typename VectorT::element_type RR;
+  RR lambda = 0, mu = 0;
+  for (octave_idx_type k = 0; k < m.columns (); k++)
+    {
+      VectorT col (m.column (k));
+      if (k > 0)
+        higham_subp (y, col, 4*k, p, lambda, mu);
+      for (octave_idx_type i = 0; i < k; i++)
+        x(i) *= lambda;
+      x(k) = mu;
+      y = lambda * y + mu * col;
+    }
+  
+  // the PM part
+  x = x / vector_norm (x, p);
+  R q = p/(p-1);
+
+  R gamma = 0, gamma1;
+  int iter = 0;
+  while (iter < maxiter)
+    {
+      y = m*x;
+      gamma1 = gamma;
+      gamma = vector_norm (y, p);
+      z = dual_p (y, p, q);
+      z = z.hermitian ();
+      z = z * m;
+
+      if (iter > 0 && (vector_norm (z, q) <= gamma
+                       || (gamma - gamma1) <= tol*gamma))
+        break;
+
+      z = z.hermitian ();
+      x = dual_p (z, q, p);
+      iter ++;
+    }
+
+  return gamma;
+}
+
+// TODO: is there a better way?
+inline float get_eps (float) { return FLT_EPSILON; }
+inline double get_eps (double) { return DBL_EPSILON; }
+// derive column vector and SVD types 
+
+static const char *p_less1_gripe = "xnorm: p must be at least 1";
+
+// Static constant to control the maximum number of iterations. 100 seems to be a good value.
+// Eventually, we can provide a means to change this constant from Octave.
+static int max_norm_iter = 100;
+
+// version with SVD for dense matrices
+template <class MatrixT, class VectorT, class SVDT, class R>
+R matrix_norm (const MatrixT& m, R p, VectorT, SVDT)
+{
+  R res = 0;
+  if (p == 2)
+    {
+      SVDT svd (m, SVD::sigma_only);
+      res = svd.singular_values () (0,0);
+    }
+  else if (p == 1)
+    res = xcolnorms (m, 1).max ();
+  else if (lo_ieee_isinf (p))
+    res = xrownorms (m, 1).max ();
+  else if (p > 1)
+    {
+      VectorT x;
+      res = higham (m, p, std::sqrt (get_eps (p)), max_norm_iter, x);
+    }
+  else
+    (*current_liboctave_error_handler) (p_less1_gripe); 
+
+  return res;
+}
+
+// SVD-free version for sparse matrices
+template <class MatrixT, class VectorT, class R>
+R matrix_norm (const MatrixT& m, R p, VectorT)
+{
+  R res = 0;
+  if (p == 1)
+    res = xcolnorms (m, 1).max ();
+  else if (lo_ieee_isinf (p))
+    res = xrownorms (m, 1).max ();
+  else if (p > 1)
+    {
+      VectorT x;
+      res = higham (m, p, std::sqrt (get_eps (p)), max_norm_iter, x);
+    }
+  else
+    (*current_liboctave_error_handler) (p_less1_gripe); 
+
+  return res;
+}
+
+// and finally, here's what we've promised in the header file
+
+#define DEFINE_XNORM_FUNCS(PREFIX, RTYPE) \
+  RTYPE xnorm (const PREFIX##ColumnVector& x, RTYPE p) \
+  { return vector_norm (x, p); } \
+  RTYPE xnorm (const PREFIX##RowVector& x, RTYPE p) \
+  { return vector_norm (x, p); } \
+  RTYPE xnorm (const PREFIX##Matrix& x, RTYPE p) \
+  { return matrix_norm (x, p, PREFIX##Matrix (), PREFIX##SVD ()); } \
+  RTYPE xfrobnorm (const PREFIX##Matrix& x) \
+  { return vector_norm (x, static_cast<RTYPE> (2)); }
+
+DEFINE_XNORM_FUNCS(, double)
+DEFINE_XNORM_FUNCS(Complex, double)
+DEFINE_XNORM_FUNCS(Float, float)
+DEFINE_XNORM_FUNCS(FloatComplex, float)
+
+// this is needed to avoid copying the sparse matrix for xfrobnorm
+template <class T, class R>
+inline void array_norm_2 (const T* v, octave_idx_type n, R& res)
+{
+  norm_accumulator_2<R> acc;
+  for (octave_idx_type i = 0; i < n; i++)
+    acc.accum (v[i]);
+
+  res = acc;
+}
+
+#define DEFINE_XNORM_SPARSE_FUNCS(PREFIX, RTYPE) \
+  RTYPE xnorm (const Sparse##PREFIX##Matrix& x, RTYPE p) \
+  { return matrix_norm (x, p, PREFIX##Matrix ()); } \
+  RTYPE xfrobnorm (const Sparse##PREFIX##Matrix& x) \
+  { \
+    RTYPE res; \
+    array_norm_2 (x.data (), x.nnz (), res); \
+    return res; \
+  }
+
+DEFINE_XNORM_SPARSE_FUNCS(, double)
+DEFINE_XNORM_SPARSE_FUNCS(Complex, double)
+
+#define DEFINE_COLROW_NORM_FUNCS(PREFIX, RPREFIX, RTYPE) \
+  extern RPREFIX##RowVector xcolnorms (const PREFIX##Matrix& m, RTYPE p) \
+  { return column_norms (m, p); } \
+  extern RPREFIX##ColumnVector xrownorms (const PREFIX##Matrix& m, RTYPE p) \
+  { return row_norms (m, p); } \
+
+DEFINE_COLROW_NORM_FUNCS(, , double)
+DEFINE_COLROW_NORM_FUNCS(Complex, , double)
+DEFINE_COLROW_NORM_FUNCS(Float, Float, float)
+DEFINE_COLROW_NORM_FUNCS(FloatComplex, Float, float)
+
+DEFINE_COLROW_NORM_FUNCS(Sparse, , double)
+DEFINE_COLROW_NORM_FUNCS(SparseComplex, , double)
+