view jmedcouple.c++ @ 74:305b7361a5bddefaulttip@

showalgo: save a snapshot instead of waiting for keyboard input
author Jordi Gutiérrez Hermoso Sun, 29 May 2016 19:05:01 -0400 4c8b4c1e851e
line wrap: on
line source
```
// jmedcouple.c++ ---

// Author: Jordi Gutiérrez Hermoso

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#include <vector>
#include <limits>
#include <iostream>
#include <string>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <utility>
#include <tuple>

using namespace std;

template <typename T>
int signum(T val) {
return (T(0) < val) - (val < T(0));
}

template <typename Container, typename Out = typename Container::value_type>
Out sum(const Container& C)  {
return accumulate(C.begin(), C.end(), Out());
}

/*
This computes the weighted median of array A with corresponding
weights W.
*/
double wmedian(const vector<double>& A,
const vector<long>& W)
{
typedef pair<double, long> aw_t;

long n = A.size();
vector<aw_t> AW(n);
for (long i = 0; i < n; i ++)
AW[i] = make_pair(A[i], W[i]);

long wtot = sum(W);

long beg = 0;
long end = n - 1;

while (true) {
long mid = (beg + end)/2;
nth_element(AW.begin(), AW.begin() + mid, AW.end(),
[](const aw_t& l, const aw_t& r){return l.first > r.first;});
double trial = AW[mid].first;
long wleft = 0, wright = 0;

for (aw_t& aw: AW ) {
double a;
long w;
tie(a, w) = move(aw);
if (a > trial)
wleft += w;
else
// This also includes a == trial, i.e. the "middle" weight.
wright += w;
}

if (2*wleft > wtot)
end = mid;
else if (2*wright < wtot)
beg = mid;
else
return trial;
}

return 0;
}

double medcouple(const std::vector<double>& X,
double eps1 = numeric_limits<double>::epsilon(),
double eps2 = numeric_limits<double>::min())
{
long n = X.size(), n2 = (n - 1)/2;

if (n < 3)
return 0;

vector<double> Z = X;
sort(Z.begin(), Z.end(), std::greater<double>());

double Zmed;
if (n % 2 == 1)
Zmed = Z[n2];
else
Zmed = (Z[n2] + Z[n2 + 1])/2;

// Check if the median is at the edges up to relative epsilon
if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)))
return -1.0;
if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed)))
return 1.0;

// Centre Z wrt median, so that median(Z) = 0.
for_each(Z.begin(), Z.end(), [&](double& z){ z -= Zmed;});

// Scale inside [-0.5, 0.5], for greater numerical stability.
double Zden = 2*max(Z[0], -Z[n - 1]);
for_each(Z.begin(), Z.end(), [&](double& z){z /= Zden;});
Zmed /= Zden;

double Zeps = eps1*(eps1 + abs(Zmed));

// These overlap on the entries that are tied with the median
vector<double> Zplus, Zminus;
copy_if(Z.begin(), Z.end(), back_inserter(Zplus),
[=](double z){return z >= -Zeps;});
copy_if(Z.begin(), Z.end(), back_inserter(Zminus),
[=](double z){return Zeps >= z;});

long
n_plus = Zplus.size(),
n_minus = Zminus.size();

/*
Kernel function h for the medcouple, closing over the values of
Zplus and Zminus just defined above.

In case a and be are within epsilon of the median, the kernel
is the signum of their position.
*/
auto h_kern = [&](long i, long j) {
double a = Zplus[i];
double b = Zminus[j];

double h;
if (abs(a - b) <= 2*eps2)
h = signum(n_plus - 1 - i - j);
else
h = (a + b)/(a - b);

return h;
};

// Init left and right borders
vector<long> L(n_plus, 0);
vector<long> R(n_plus, n_minus - 1);

long Ltot = 0;
long Rtot = n_minus*n_plus;
long medc_idx = Rtot/2;

// kth pair algorithm (Johnson & Mizoguchi)
while (Rtot - Ltot > n_plus) {

// First, compute the median inside the given bounds
vector<double> A;
vector<long> W;
for (long i = 0; i < n_plus; i++) {
if (L[i] <= R[i]) {
A.push_back(h_kern(i, (L[i] + R[i])/2));
W.push_back(R[i] - L[i] + 1);
}
}
double Am = wmedian(A, W);

double Am_eps = eps1*(eps1 + abs(Am));

// Compute new left and right boundaries, based on the weighted
// median
vector<long> P(n_plus), Q(n_plus);

{
long j = 0;
for (long i = n_plus - 1; i >= 0; i--) {
while (j < n_minus and h_kern(i, j) - Am > Am_eps)
j++;
P[i] = j - 1;
}
}

{
long j = n_minus - 1;
for (long i = 0; i < n_plus; i++) {
while (j >= 0 and h_kern(i, j) - Am < -Am_eps)
j--;
Q[i] = j + 1;
}
}

long
sumP = sum(P) + n_plus,
sumQ = sum(Q);

if (medc_idx <= sumP - 1) {
R = P;
Rtot = sumP;
}
else {
if (medc_idx > sumQ - 1) {
L = Q;
Ltot = sumQ;
}
else
return Am;
}
}

// Didn't find the median, but now we have a very small search space
// to find it in, just between the left and right boundaries. This
// space is of size Rtot - Ltot which is <= n_plus
vector<double> A;
for (long i = 0; i < n_plus; i++) {
for (long j = L[i]; j <= R[i]; j++)
A.push_back(h_kern(i, j));
}
nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(),
[](double x, double y) {return x > y;});

double Am = A[medc_idx - Ltot];
return Am;
}

int main(int argc, char** argv) {
double
eps1 = numeric_limits<double>::epsilon(),
eps2 = numeric_limits<double>::min();

string fname;
if (argc > 1) {
fname = argv[1];
}
else {
cerr << "Please provide input file." << endl;
return 1;
}

ifstream ifs(fname);
if (not ifs) {
cerr << "File " << fname << " not found." << endl;
return 1;
}

vector<double> data;
double datum;
while (ifs >> datum) {
data.push_back(datum);
}

cout << setprecision(16)
<< medcouple(data, eps1, eps2) << endl;

return 0;
}
```