changeset 38:45fb52ba3057

Add D implementation of the medcouple Currently slow, gotta figure out why.
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Mon, 09 May 2016 19:12:28 -0400
parents 4fb3b87b8610
children 4c8b4c1e851e
files medcouple.d
diffstat 1 files changed, 229 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/medcouple.d
@@ -0,0 +1,229 @@
+// medcouple.d ---
+
+// Copyright  © 2016 Jordi Gutiérrez Hermoso <jordigh@octave.org>
+
+// Author: Jordi Gutiérrez Hermoso
+
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 3
+// of the License, or (at your option) any later version.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import std.stdio;
+import std.array;
+import std.conv: to;
+import std.range: iota, zip;
+import std.string: strip;
+import std.typecons: Tuple;
+import std.algorithm: filter, map, max, reduce, sort, topN;
+import std.math: abs;
+
+// Could also just be reduce!"a+b", they call this "string lambda"
+alias sum = reduce!((a,b) => a + b);
+
+int signum(T)(T val) {
+  return (cast(T)(0) < val ) - (val < cast(T)(0));
+}
+
+double wmedian(double[] A, long[] W) {
+  auto AW = zip(A,W).array;
+  long n = AW.length;
+  auto wtot = sum(W);
+
+  typeof(n)
+    beg = 0,
+    end = n - 1;
+
+  while(true) {
+    auto mid = (beg+end)/2;
+    topN!((x, y) => x[0] > y[0])(AW, mid);
+
+    auto trial = AW[mid][0];
+
+    typeof(n) wleft = 0, wright = 0;
+
+    foreach(aw; AW) {
+      auto a = aw[0];
+      auto w = aw[1];
+      if (a > trial)
+        wleft += w;
+      else
+        wright += w;
+    }
+
+    if (2*wleft > wtot)
+      end = mid;
+    else if (2*wright < wtot)
+      beg = mid;
+    else
+      return trial;
+  }
+}
+
+double medcouple(const double[] X,
+                 double eps1 = double.epsilon,
+                 double eps2 = double.min)
+{
+  long n = X.length, n2 = (n-1)/2;
+  if (n < 3)
+    return 0;
+
+  auto Z = X.dup;
+  sort!((a,b) => a > b)(Z);
+
+  auto Zmed = n % 2 == 1 ? Z[n2] : (Z[n2] + Z[n2 + 1])/2;
+
+  // Check if the median is at the edges up to relative epsilon
+  if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)))
+    return -1.0;
+  if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed)))
+    return 1.0;
+
+  // Centre Z wrt median, so that median(Z) = 0.
+  Z[] -= Zmed;
+
+  // Scale inside [-0.5, 0.5], for greater numerical stability.
+  auto Zden = 2*max(Z[0], -Z[n - 1]);
+  Z[] /= Zden;
+  Zmed /= Zden;
+
+  
+  auto
+    Zeps = eps1*(eps1 + abs(Zmed)),
+
+    // These overlap on the entries that are tied with the median
+    Zplus = filter!(z => z >= -Zeps)(Z).array,
+    Zminus = filter!(z => Zeps >= z)(Z).array;
+
+  long
+    n_plus = Zplus.length,
+    n_minus = Zminus.length;
+
+
+  /*
+    Kernel function h for the medcouple, closing over the values of
+    Zplus and Zminus just defined above.
+
+    In case a and be are within epsilon of the median, the kernel
+    is the signum of their position.
+  */
+  auto h_kern(long i, long j){
+    auto
+      a = Zplus[i],
+      b = Zminus[j];
+
+    if (abs(a - b) <= 2*eps2)
+      return signum(n_plus - 1 - i - j);
+    else
+      return (a+b)/(a-b);
+  }
+
+
+  auto
+    // Init left and right borders
+    L = replicate([0L], n_plus),
+    R = replicate([n_minus - 1], n_plus),
+    
+    Ltot = 0L,
+    Rtot = n_minus*n_plus,
+    medc_idx = Rtot/2;
+
+  // kth pair algorithm (Johnson & Mizoguchi)
+  while (Rtot - Ltot > n_plus) {
+
+    // First, compute the weighted median of row medians inside the
+    // given bounds
+    auto
+      I = filter!(i => L[i] <= R[i])(iota(0, n_plus-1)),
+      A = map!(i => h_kern(i, (L[i] + R[i])/2))(I).array,
+      W = map!(i => R[i] - L[i] + 1)(I).array,
+
+      Am = wmedian(A, W),
+      Am_eps = eps1*(eps1 + abs(Am));
+
+    // Compute new left and right boundaries, based on the weighted
+    // median
+    auto P = new long[n_plus], Q = new long[n_plus];
+    {
+      auto j = 0L;
+      for (auto i = n_plus - 1; i >= 0; i--) {
+        while (j < n_minus && h_kern(i, j) - Am > Am_eps)
+          j++;
+        P[i] = j - 1;
+      }
+    }
+
+    {
+      auto j = n_minus - 1;
+      for (auto i = 0; i < n_plus; i++) {
+        while (j >= 0 && h_kern(i, j) - Am < -Am_eps)
+          j--;
+        Q[i] = j + 1;
+      }
+    }
+
+    // Discard left or right entries, depending on which side the
+    // medcouple is.
+    auto
+      sumP = sum(P) + n_plus,
+      sumQ = sum(Q);
+
+    if (medc_idx <= sumP - 1) {
+      R = P;
+      Rtot = sumP;
+    }
+    else {
+      if (medc_idx > sumQ - 1) {
+        L = Q;
+        Ltot = sumQ;
+      }
+      else
+        return Am;
+    }
+  }
+
+  // Didn't find the median, but now we have a very small search space
+  // to find it in, just between the left and right boundaries. This
+  // space is of size Rtot - Ltot which is <= n_plus
+  auto
+    A = new double[Rtot - Ltot],
+    idx = 0L;
+
+  for (auto i = 0L; i < n_plus; i++) {
+    for (auto j = L[i]; j <= R[i]; j++){
+      A[idx] = h_kern(i, j);
+      ++idx;
+    }
+  }
+  
+  topN!((x,y) => x > y)(A, medc_idx - Ltot);
+  return A[medc_idx - Ltot];
+ 
+}
+
+int main(string[] argv) {
+  if (argv.length < 2) {
+    stderr.writeln("Please provide input file.");
+    return 1;
+  }
+
+  auto fname = argv[1];
+  double[] data;
+
+  foreach(line; File(fname).byLine) {
+    if (! line.empty)
+      data ~= to!(double)(line.strip);
+  }
+
+  writefln("%0.16f", medcouple(data));
+    
+  return 0;
+}