comparison jmedcouple.c++ @ 25:518ef922033e

Add C++ translation of Python code
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sun, 18 Jan 2015 19:43:51 -0500
parents
children 40cbfb64e952
comparison
equal deleted inserted replaced
24:cd3094a08e03 25:518ef922033e
1 #include <vector>
2 #include <limits>
3 #include <iostream>
4 #include <string>
5 #include <fstream>
6 #include <iomanip>
7 #include <sstream>
8 #include <algorithm>
9 #include <utility>
10
11 using namespace std;
12
13 template <typename T>
14 int signum(T val) {
15 return (T(0) < val) - (val < T(0));
16 }
17
18 template <typename Container, typename Out = typename Container::value_type>
19 Out sum(const Container& C) {
20 return accumulate(C.begin(), C.end(), Out());
21 }
22
23 /*
24 This computes the weighted median of array A with corresponding
25 weights W.
26 */
27 double wmedian(const vector<double>& A,
28 const vector<int>& W)
29 {
30 typedef pair<double,int> aw_t;
31
32 int n = A.size();
33 vector<aw_t> AW(n);
34 for (int i = 0; i < n; i ++)
35 AW[i] = make_pair(A[i], W[i]);
36
37 long wtot = sum(W);
38
39 int beg = 0;
40 int end = n - 1;
41
42 while (true) {
43 int mid = (beg + end)/2;
44 nth_element(AW.begin(), AW.begin() + mid, AW.end(),
45 [](const aw_t& l, const aw_t& r){return l.first > r.first;});
46 double trial = AW[mid].first;
47
48 long wleft = 0, wright = 0;
49
50 for (aw_t& aw: AW ) {
51 if (aw.first > trial)
52 wleft += aw.second;
53 else
54 // This also includes a == trial, i.e. the "middle" weight.
55 wright += aw.second;
56 }
57
58 if (2*wleft > wtot)
59 end = mid;
60 else if (2*wright < wtot)
61 beg = mid;
62 else
63 return trial;
64 }
65
66 return 0;
67 }
68
69 double medcouple(const std::vector<double>& X,
70 double eps1 = numeric_limits<double>::epsilon(),
71 double eps2 = numeric_limits<double>::min(),
72 int trace_lev = 0)
73 {
74 size_t n = X.size(), n2 = (n - 1)/2;
75
76 if (n < 3)
77 return 0;
78
79 vector<double> Z = X;
80 sort(Z.begin(), Z.end(), std::greater<double>());
81
82 double Zmed;
83 if (n % 2 == 1)
84 Zmed = Z[n2];
85 else
86 Zmed = (Z[n2] + Z[n2 + 1])/2;
87
88 // Check if the median is at the edges up to relative epsilon
89 if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)))
90 return -1.0;
91 if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed)))
92 return 1.0;
93
94 // Centre Z wrt median, so that median(Z) = 0.
95 for_each(Z.begin(), Z.end(), [&](double& z){ z -= Zmed;});
96
97 // Scale inside [-0.5, 0.5], for greater numerical stability.
98 double Zden = 2*max(Z[0], -Z[n - 1]);
99 for_each(Z.begin(), Z.end(), [&](double& z){z /= Zden;});
100 Zmed /= Zden;
101
102 double Zeps = eps1*(eps1 + abs(Zmed));
103
104 // These overlap on the entries that are tied with the median
105 vector<double> Zplus, Zminus;
106 copy_if(Z.begin(), Z.end(), back_inserter(Zplus),
107 [=](double z){return z >= -Zeps;});
108 copy_if(Z.begin(), Z.end(), back_inserter(Zminus),
109 [=](double z){return Zeps >= z;});
110
111 size_t
112 n_plus = Zplus.size(),
113 n_minus = Zminus.size();
114
115 /*
116 Kernel function h for the medcouple, closing over the values of
117 Zplus and Zminus just defined above.
118
119 In case a and be are within epsilon of the median, the kernel
120 is the signum of their position.
121 */
122 auto h_kern = [&](int i, int j) {
123 double a = Zplus[i];
124 double b = Zminus[j];
125
126 double h;
127 if (abs(a - b) <= 2*eps2)
128 h = signum(n_plus - 1 - i - j);
129 else
130 h = (a + b)/(a - b);
131
132 return h;
133 };
134
135 // Init left and right borders
136 vector<int> L(n_plus, 0);
137 vector<int> R(n_plus, n_minus - 1);
138
139 long Ltot = 0;
140 long Rtot = n_minus*n_plus;
141 int medc_idx = Rtot/2;
142
143 // kth pair algorithm (Johnson & Mizoguchi)
144 while (Rtot - Ltot > n_plus) {
145 // First, compute the median inside the given bounds
146 vector<double> A;
147 vector<int> W;
148 for (int i = 0; i < n_plus; i++) {
149 if (L[i] <= R[i]) {
150 A.push_back(h_kern(i, (L[i] + R[i])/2));
151 W.push_back(R[i] - L[i] + 1);
152 }
153 }
154 double Am = wmedian(A, W);
155
156 double Am_eps = eps1*(eps1 + abs(Am));
157
158 //Compute new left and right boundaries, based on the weighted
159 //median
160 vector<int> P(n_plus), Q(n_plus);
161
162 {
163 int j = 0;
164 for (int i = n_plus - 1; i >= 0; i--) {
165 while (j < n_minus and h_kern(i, j) - Am > Am_eps)
166 j++;
167 P[i] = j - 1;
168 }
169 }
170
171 {
172 int j = n_minus - 1;
173 for (int i = 0; i < n_plus; i++) {
174 while (j >= 0 and h_kern(i, j) - Am < -Am_eps)
175 j--;
176 Q[i] = j + 1;
177 }
178 }
179
180 long
181 sumP = sum(P) + n_plus,
182 sumQ = sum(Q);
183
184 if (medc_idx <= sumP - 1) {
185 R = P;
186 Rtot = sumP;
187 }
188 else {
189 if (medc_idx > sumQ - 1) {
190 L = Q;
191 Ltot = sumQ;
192 }
193 else
194 return Am;
195 }
196 }
197
198 // Didn't find the median, but now we have a very small search space
199 // to find it in, just between the left and right boundaries. This
200 // space is of size Rtot - Ltot which is <= n_plus
201 vector<double> A;
202 for (int i = 0; i < n_plus; i++) {
203 for (int j = L[i]; j <= R[i]; j++)
204 A.push_back(h_kern(i, j));
205 }
206 nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(),
207 [](double x, double y) {return x > y;});
208
209 double Am = A[medc_idx - Ltot];
210 return Am;
211 }
212
213
214
215 int main(int argc, char** argv) {
216 double
217 eps1 = numeric_limits<double>::epsilon(),
218 eps2 = numeric_limits<double>::min();
219
220 string fname;
221 if (argc > 1) {
222 fname = argv[1];
223 }
224 else {
225 cerr << "Please provide input file." << endl;
226 return 1;
227 }
228
229 double trace_lev = 0;
230 if(argc > 2) {
231 stringstream ss;
232 ss << argv[2];
233 ss >> trace_lev;
234 }
235
236 ifstream ifs(fname);
237 if (not ifs) {
238 cerr << "File " << fname << " not found." << endl;
239 return 1;
240 }
241
242 vector<double> data;
243 double datum;
244 while (ifs >> datum) {
245 data.push_back(datum);
246 }
247
248 cout << setprecision(16)
249 << medcouple(data, eps1, eps2, trace_lev) << endl;
250
251 return 0;
252 }
253