Mercurial > hg > medcouple
comparison jmedcouple.c++ @ 25:518ef922033e
Add C++ translation of Python code
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Sun, 18 Jan 2015 19:43:51 -0500 |
parents | |
children | 40cbfb64e952 |
comparison
equal
deleted
inserted
replaced
24:cd3094a08e03 | 25:518ef922033e |
---|---|
1 #include <vector> | |
2 #include <limits> | |
3 #include <iostream> | |
4 #include <string> | |
5 #include <fstream> | |
6 #include <iomanip> | |
7 #include <sstream> | |
8 #include <algorithm> | |
9 #include <utility> | |
10 | |
11 using namespace std; | |
12 | |
13 template <typename T> | |
14 int signum(T val) { | |
15 return (T(0) < val) - (val < T(0)); | |
16 } | |
17 | |
18 template <typename Container, typename Out = typename Container::value_type> | |
19 Out sum(const Container& C) { | |
20 return accumulate(C.begin(), C.end(), Out()); | |
21 } | |
22 | |
23 /* | |
24 This computes the weighted median of array A with corresponding | |
25 weights W. | |
26 */ | |
27 double wmedian(const vector<double>& A, | |
28 const vector<int>& W) | |
29 { | |
30 typedef pair<double,int> aw_t; | |
31 | |
32 int n = A.size(); | |
33 vector<aw_t> AW(n); | |
34 for (int i = 0; i < n; i ++) | |
35 AW[i] = make_pair(A[i], W[i]); | |
36 | |
37 long wtot = sum(W); | |
38 | |
39 int beg = 0; | |
40 int end = n - 1; | |
41 | |
42 while (true) { | |
43 int mid = (beg + end)/2; | |
44 nth_element(AW.begin(), AW.begin() + mid, AW.end(), | |
45 [](const aw_t& l, const aw_t& r){return l.first > r.first;}); | |
46 double trial = AW[mid].first; | |
47 | |
48 long wleft = 0, wright = 0; | |
49 | |
50 for (aw_t& aw: AW ) { | |
51 if (aw.first > trial) | |
52 wleft += aw.second; | |
53 else | |
54 // This also includes a == trial, i.e. the "middle" weight. | |
55 wright += aw.second; | |
56 } | |
57 | |
58 if (2*wleft > wtot) | |
59 end = mid; | |
60 else if (2*wright < wtot) | |
61 beg = mid; | |
62 else | |
63 return trial; | |
64 } | |
65 | |
66 return 0; | |
67 } | |
68 | |
69 double medcouple(const std::vector<double>& X, | |
70 double eps1 = numeric_limits<double>::epsilon(), | |
71 double eps2 = numeric_limits<double>::min(), | |
72 int trace_lev = 0) | |
73 { | |
74 size_t n = X.size(), n2 = (n - 1)/2; | |
75 | |
76 if (n < 3) | |
77 return 0; | |
78 | |
79 vector<double> Z = X; | |
80 sort(Z.begin(), Z.end(), std::greater<double>()); | |
81 | |
82 double Zmed; | |
83 if (n % 2 == 1) | |
84 Zmed = Z[n2]; | |
85 else | |
86 Zmed = (Z[n2] + Z[n2 + 1])/2; | |
87 | |
88 // Check if the median is at the edges up to relative epsilon | |
89 if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed))) | |
90 return -1.0; | |
91 if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed))) | |
92 return 1.0; | |
93 | |
94 // Centre Z wrt median, so that median(Z) = 0. | |
95 for_each(Z.begin(), Z.end(), [&](double& z){ z -= Zmed;}); | |
96 | |
97 // Scale inside [-0.5, 0.5], for greater numerical stability. | |
98 double Zden = 2*max(Z[0], -Z[n - 1]); | |
99 for_each(Z.begin(), Z.end(), [&](double& z){z /= Zden;}); | |
100 Zmed /= Zden; | |
101 | |
102 double Zeps = eps1*(eps1 + abs(Zmed)); | |
103 | |
104 // These overlap on the entries that are tied with the median | |
105 vector<double> Zplus, Zminus; | |
106 copy_if(Z.begin(), Z.end(), back_inserter(Zplus), | |
107 [=](double z){return z >= -Zeps;}); | |
108 copy_if(Z.begin(), Z.end(), back_inserter(Zminus), | |
109 [=](double z){return Zeps >= z;}); | |
110 | |
111 size_t | |
112 n_plus = Zplus.size(), | |
113 n_minus = Zminus.size(); | |
114 | |
115 /* | |
116 Kernel function h for the medcouple, closing over the values of | |
117 Zplus and Zminus just defined above. | |
118 | |
119 In case a and be are within epsilon of the median, the kernel | |
120 is the signum of their position. | |
121 */ | |
122 auto h_kern = [&](int i, int j) { | |
123 double a = Zplus[i]; | |
124 double b = Zminus[j]; | |
125 | |
126 double h; | |
127 if (abs(a - b) <= 2*eps2) | |
128 h = signum(n_plus - 1 - i - j); | |
129 else | |
130 h = (a + b)/(a - b); | |
131 | |
132 return h; | |
133 }; | |
134 | |
135 // Init left and right borders | |
136 vector<int> L(n_plus, 0); | |
137 vector<int> R(n_plus, n_minus - 1); | |
138 | |
139 long Ltot = 0; | |
140 long Rtot = n_minus*n_plus; | |
141 int medc_idx = Rtot/2; | |
142 | |
143 // kth pair algorithm (Johnson & Mizoguchi) | |
144 while (Rtot - Ltot > n_plus) { | |
145 // First, compute the median inside the given bounds | |
146 vector<double> A; | |
147 vector<int> W; | |
148 for (int i = 0; i < n_plus; i++) { | |
149 if (L[i] <= R[i]) { | |
150 A.push_back(h_kern(i, (L[i] + R[i])/2)); | |
151 W.push_back(R[i] - L[i] + 1); | |
152 } | |
153 } | |
154 double Am = wmedian(A, W); | |
155 | |
156 double Am_eps = eps1*(eps1 + abs(Am)); | |
157 | |
158 //Compute new left and right boundaries, based on the weighted | |
159 //median | |
160 vector<int> P(n_plus), Q(n_plus); | |
161 | |
162 { | |
163 int j = 0; | |
164 for (int i = n_plus - 1; i >= 0; i--) { | |
165 while (j < n_minus and h_kern(i, j) - Am > Am_eps) | |
166 j++; | |
167 P[i] = j - 1; | |
168 } | |
169 } | |
170 | |
171 { | |
172 int j = n_minus - 1; | |
173 for (int i = 0; i < n_plus; i++) { | |
174 while (j >= 0 and h_kern(i, j) - Am < -Am_eps) | |
175 j--; | |
176 Q[i] = j + 1; | |
177 } | |
178 } | |
179 | |
180 long | |
181 sumP = sum(P) + n_plus, | |
182 sumQ = sum(Q); | |
183 | |
184 if (medc_idx <= sumP - 1) { | |
185 R = P; | |
186 Rtot = sumP; | |
187 } | |
188 else { | |
189 if (medc_idx > sumQ - 1) { | |
190 L = Q; | |
191 Ltot = sumQ; | |
192 } | |
193 else | |
194 return Am; | |
195 } | |
196 } | |
197 | |
198 // Didn't find the median, but now we have a very small search space | |
199 // to find it in, just between the left and right boundaries. This | |
200 // space is of size Rtot - Ltot which is <= n_plus | |
201 vector<double> A; | |
202 for (int i = 0; i < n_plus; i++) { | |
203 for (int j = L[i]; j <= R[i]; j++) | |
204 A.push_back(h_kern(i, j)); | |
205 } | |
206 nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(), | |
207 [](double x, double y) {return x > y;}); | |
208 | |
209 double Am = A[medc_idx - Ltot]; | |
210 return Am; | |
211 } | |
212 | |
213 | |
214 | |
215 int main(int argc, char** argv) { | |
216 double | |
217 eps1 = numeric_limits<double>::epsilon(), | |
218 eps2 = numeric_limits<double>::min(); | |
219 | |
220 string fname; | |
221 if (argc > 1) { | |
222 fname = argv[1]; | |
223 } | |
224 else { | |
225 cerr << "Please provide input file." << endl; | |
226 return 1; | |
227 } | |
228 | |
229 double trace_lev = 0; | |
230 if(argc > 2) { | |
231 stringstream ss; | |
232 ss << argv[2]; | |
233 ss >> trace_lev; | |
234 } | |
235 | |
236 ifstream ifs(fname); | |
237 if (not ifs) { | |
238 cerr << "File " << fname << " not found." << endl; | |
239 return 1; | |
240 } | |
241 | |
242 vector<double> data; | |
243 double datum; | |
244 while (ifs >> datum) { | |
245 data.push_back(datum); | |
246 } | |
247 | |
248 cout << setprecision(16) | |
249 << medcouple(data, eps1, eps2, trace_lev) << endl; | |
250 | |
251 return 0; | |
252 } | |
253 |