diff jmedcouple.c++ @ 25:518ef922033e

Add C++ translation of Python code
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sun, 18 Jan 2015 19:43:51 -0500
parents
children 40cbfb64e952
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/jmedcouple.c++
@@ -0,0 +1,253 @@
+#include <vector>
+#include <limits>
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <iomanip>
+#include <sstream>
+#include <algorithm>
+#include <utility>
+
+using namespace std;
+
+template <typename T> 
+int signum(T val) {
+    return (T(0) < val) - (val < T(0));
+}
+
+template <typename Container, typename Out = typename Container::value_type>
+Out sum(const Container& C)  {
+  return accumulate(C.begin(), C.end(), Out());
+} 
+
+/*
+  This computes the weighted median of array A with corresponding
+  weights W.
+*/
+double wmedian(const vector<double>& A,
+               const vector<int>& W)
+{
+  typedef pair<double,int> aw_t;
+
+  int n = A.size();
+  vector<aw_t> AW(n);
+  for (int i = 0; i < n; i ++) 
+    AW[i] = make_pair(A[i], W[i]);
+
+  long wtot = sum(W);
+
+  int beg = 0;
+  int end = n - 1;
+
+  while (true) {
+    int mid = (beg + end)/2;
+    nth_element(AW.begin(), AW.begin() + mid, AW.end(),
+                [](const aw_t& l, const aw_t& r){return l.first > r.first;});
+    double trial = AW[mid].first;
+
+    long wleft = 0, wright = 0;
+
+    for (aw_t& aw: AW ) {
+      if (aw.first > trial)
+        wleft += aw.second;
+      else
+        // This also includes a == trial, i.e. the "middle" weight.
+        wright += aw.second;
+    }
+
+    if (2*wleft > wtot)
+      end = mid;
+    else if (2*wright < wtot)
+      beg = mid;
+    else
+      return trial;
+  }
+  
+  return 0;
+}
+
+double medcouple(const std::vector<double>& X,
+                 double eps1 = numeric_limits<double>::epsilon(), 
+                 double eps2 = numeric_limits<double>::min(),
+                 int trace_lev = 0)
+{
+  size_t n = X.size(), n2 = (n - 1)/2;
+  
+  if (n < 3)
+    return 0;
+
+  vector<double> Z = X;
+  sort(Z.begin(), Z.end(), std::greater<double>());
+
+  double Zmed;
+  if (n % 2 == 1) 
+    Zmed = Z[n2];
+  else
+    Zmed = (Z[n2] + Z[n2 + 1])/2;
+
+  // Check if the median is at the edges up to relative epsilon
+  if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)))
+    return -1.0;
+  if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed)))
+    return 1.0;
+  
+  // Centre Z wrt median, so that median(Z) = 0.
+  for_each(Z.begin(), Z.end(), [&](double& z){ z -= Zmed;});
+
+  // Scale inside [-0.5, 0.5], for greater numerical stability.
+  double Zden = 2*max(Z[0], -Z[n - 1]);
+  for_each(Z.begin(), Z.end(), [&](double& z){z /= Zden;});
+  Zmed /= Zden;
+
+  double Zeps = eps1*(eps1 + abs(Zmed));
+
+  // These overlap on the entries that are tied with the median
+  vector<double> Zplus, Zminus;
+  copy_if(Z.begin(), Z.end(), back_inserter(Zplus), 
+          [=](double z){return z >= -Zeps;});
+  copy_if(Z.begin(), Z.end(), back_inserter(Zminus), 
+          [=](double z){return Zeps >= z;});
+
+  size_t 
+    n_plus = Zplus.size(), 
+    n_minus = Zminus.size();
+
+  /*
+    Kernel function h for the medcouple, closing over the values of
+    Zplus and Zminus just defined above.
+
+    In case a and be are within epsilon of the median, the kernel
+    is the signum of their position.
+  */
+  auto h_kern = [&](int i, int j) {
+    double a = Zplus[i];
+    double b = Zminus[j];
+
+    double h;
+    if (abs(a - b) <= 2*eps2)
+      h = signum(n_plus - 1 - i - j);
+    else
+      h = (a + b)/(a - b);
+
+    return h;
+  };
+ 
+  // Init left and right borders
+  vector<int> L(n_plus, 0);
+  vector<int> R(n_plus, n_minus - 1);
+
+  long Ltot = 0;
+  long Rtot = n_minus*n_plus;
+  int medc_idx = Rtot/2;
+  
+  // kth pair algorithm (Johnson & Mizoguchi)
+  while (Rtot - Ltot > n_plus) {
+    // First, compute the median inside the given bounds
+    vector<double> A;
+    vector<int> W;
+    for (int i = 0; i < n_plus; i++) {
+      if (L[i] <= R[i]) {
+        A.push_back(h_kern(i, (L[i] + R[i])/2));
+        W.push_back(R[i] - L[i] + 1);
+      }
+    }
+    double Am = wmedian(A, W);
+    
+    double Am_eps = eps1*(eps1 + abs(Am));
+
+    //Compute new left and right boundaries, based on the weighted
+    //median
+    vector<int> P(n_plus), Q(n_plus);
+
+    {
+      int j = 0;
+      for (int i = n_plus - 1; i >= 0; i--) {
+        while (j < n_minus and h_kern(i, j) - Am > Am_eps)
+          j++;
+        P[i] = j - 1;
+      }
+    }
+
+    {
+      int j = n_minus - 1;
+      for (int i = 0; i < n_plus; i++) {
+        while (j >= 0 and h_kern(i, j) - Am < -Am_eps)
+          j--;
+        Q[i] = j + 1;
+      }
+    }
+
+    long 
+      sumP = sum(P) + n_plus,
+      sumQ = sum(Q);
+    
+    if (medc_idx <= sumP - 1) {
+      R = P;
+      Rtot = sumP;
+    }
+    else {
+      if (medc_idx > sumQ - 1) {
+        L = Q;
+        Ltot = sumQ;
+      }
+      else
+        return Am;
+    }
+  }
+
+  // Didn't find the median, but now we have a very small search space
+  // to find it in, just between the left and right boundaries. This
+  // space is of size Rtot - Ltot which is <= n_plus
+  vector<double> A;
+  for (int i = 0; i < n_plus; i++) {
+    for (int j = L[i]; j <= R[i]; j++)
+      A.push_back(h_kern(i, j));
+  }
+  nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(),
+              [](double x, double y) {return x > y;});
+  
+  double Am = A[medc_idx - Ltot];
+  return Am;
+}
+
+
+
+int main(int argc, char** argv) {
+  double 
+    eps1 = numeric_limits<double>::epsilon(), 
+    eps2 = numeric_limits<double>::min();
+
+  string fname;
+  if (argc > 1) {
+    fname = argv[1];
+  }
+  else {
+    cerr << "Please provide input file." << endl;
+    return 1;
+  }
+
+  double trace_lev = 0;
+  if(argc > 2) {
+    stringstream ss;
+    ss << argv[2];
+    ss >> trace_lev;
+  }
+
+  ifstream ifs(fname);
+  if (not ifs) {
+    cerr << "File " << fname << " not found." << endl;
+    return 1;
+  }
+
+  vector<double> data;  
+  double datum;
+  while (ifs >> datum) {
+    data.push_back(datum);
+  }
+  
+  cout << setprecision(16) 
+       << medcouple(data, eps1, eps2, trace_lev) << endl;
+
+  return 0;
+}
+