Mercurial > hg > medcouple
changeset 25:518ef922033e
Add C++ translation of Python code
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Sun, 18 Jan 2015 19:43:51 -0500 |
parents | cd3094a08e03 |
children | 6e69d0bfd44f |
files | jmedcouple.c++ |
diffstat | 1 files changed, 253 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/jmedcouple.c++ @@ -0,0 +1,253 @@ +#include <vector> +#include <limits> +#include <iostream> +#include <string> +#include <fstream> +#include <iomanip> +#include <sstream> +#include <algorithm> +#include <utility> + +using namespace std; + +template <typename T> +int signum(T val) { + return (T(0) < val) - (val < T(0)); +} + +template <typename Container, typename Out = typename Container::value_type> +Out sum(const Container& C) { + return accumulate(C.begin(), C.end(), Out()); +} + +/* + This computes the weighted median of array A with corresponding + weights W. +*/ +double wmedian(const vector<double>& A, + const vector<int>& W) +{ + typedef pair<double,int> aw_t; + + int n = A.size(); + vector<aw_t> AW(n); + for (int i = 0; i < n; i ++) + AW[i] = make_pair(A[i], W[i]); + + long wtot = sum(W); + + int beg = 0; + int end = n - 1; + + while (true) { + int mid = (beg + end)/2; + nth_element(AW.begin(), AW.begin() + mid, AW.end(), + [](const aw_t& l, const aw_t& r){return l.first > r.first;}); + double trial = AW[mid].first; + + long wleft = 0, wright = 0; + + for (aw_t& aw: AW ) { + if (aw.first > trial) + wleft += aw.second; + else + // This also includes a == trial, i.e. the "middle" weight. + wright += aw.second; + } + + if (2*wleft > wtot) + end = mid; + else if (2*wright < wtot) + beg = mid; + else + return trial; + } + + return 0; +} + +double medcouple(const std::vector<double>& X, + double eps1 = numeric_limits<double>::epsilon(), + double eps2 = numeric_limits<double>::min(), + int trace_lev = 0) +{ + size_t n = X.size(), n2 = (n - 1)/2; + + if (n < 3) + return 0; + + vector<double> Z = X; + sort(Z.begin(), Z.end(), std::greater<double>()); + + double Zmed; + if (n % 2 == 1) + Zmed = Z[n2]; + else + Zmed = (Z[n2] + Z[n2 + 1])/2; + + // Check if the median is at the edges up to relative epsilon + if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed))) + return -1.0; + if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed))) + return 1.0; + + // Centre Z wrt median, so that median(Z) = 0. + for_each(Z.begin(), Z.end(), [&](double& z){ z -= Zmed;}); + + // Scale inside [-0.5, 0.5], for greater numerical stability. + double Zden = 2*max(Z[0], -Z[n - 1]); + for_each(Z.begin(), Z.end(), [&](double& z){z /= Zden;}); + Zmed /= Zden; + + double Zeps = eps1*(eps1 + abs(Zmed)); + + // These overlap on the entries that are tied with the median + vector<double> Zplus, Zminus; + copy_if(Z.begin(), Z.end(), back_inserter(Zplus), + [=](double z){return z >= -Zeps;}); + copy_if(Z.begin(), Z.end(), back_inserter(Zminus), + [=](double z){return Zeps >= z;}); + + size_t + n_plus = Zplus.size(), + n_minus = Zminus.size(); + + /* + Kernel function h for the medcouple, closing over the values of + Zplus and Zminus just defined above. + + In case a and be are within epsilon of the median, the kernel + is the signum of their position. + */ + auto h_kern = [&](int i, int j) { + double a = Zplus[i]; + double b = Zminus[j]; + + double h; + if (abs(a - b) <= 2*eps2) + h = signum(n_plus - 1 - i - j); + else + h = (a + b)/(a - b); + + return h; + }; + + // Init left and right borders + vector<int> L(n_plus, 0); + vector<int> R(n_plus, n_minus - 1); + + long Ltot = 0; + long Rtot = n_minus*n_plus; + int medc_idx = Rtot/2; + + // kth pair algorithm (Johnson & Mizoguchi) + while (Rtot - Ltot > n_plus) { + // First, compute the median inside the given bounds + vector<double> A; + vector<int> W; + for (int i = 0; i < n_plus; i++) { + if (L[i] <= R[i]) { + A.push_back(h_kern(i, (L[i] + R[i])/2)); + W.push_back(R[i] - L[i] + 1); + } + } + double Am = wmedian(A, W); + + double Am_eps = eps1*(eps1 + abs(Am)); + + //Compute new left and right boundaries, based on the weighted + //median + vector<int> P(n_plus), Q(n_plus); + + { + int j = 0; + for (int i = n_plus - 1; i >= 0; i--) { + while (j < n_minus and h_kern(i, j) - Am > Am_eps) + j++; + P[i] = j - 1; + } + } + + { + int j = n_minus - 1; + for (int i = 0; i < n_plus; i++) { + while (j >= 0 and h_kern(i, j) - Am < -Am_eps) + j--; + Q[i] = j + 1; + } + } + + long + sumP = sum(P) + n_plus, + sumQ = sum(Q); + + if (medc_idx <= sumP - 1) { + R = P; + Rtot = sumP; + } + else { + if (medc_idx > sumQ - 1) { + L = Q; + Ltot = sumQ; + } + else + return Am; + } + } + + // Didn't find the median, but now we have a very small search space + // to find it in, just between the left and right boundaries. This + // space is of size Rtot - Ltot which is <= n_plus + vector<double> A; + for (int i = 0; i < n_plus; i++) { + for (int j = L[i]; j <= R[i]; j++) + A.push_back(h_kern(i, j)); + } + nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(), + [](double x, double y) {return x > y;}); + + double Am = A[medc_idx - Ltot]; + return Am; +} + + + +int main(int argc, char** argv) { + double + eps1 = numeric_limits<double>::epsilon(), + eps2 = numeric_limits<double>::min(); + + string fname; + if (argc > 1) { + fname = argv[1]; + } + else { + cerr << "Please provide input file." << endl; + return 1; + } + + double trace_lev = 0; + if(argc > 2) { + stringstream ss; + ss << argv[2]; + ss >> trace_lev; + } + + ifstream ifs(fname); + if (not ifs) { + cerr << "File " << fname << " not found." << endl; + return 1; + } + + vector<double> data; + double datum; + while (ifs >> datum) { + data.push_back(datum); + } + + cout << setprecision(16) + << medcouple(data, eps1, eps2, trace_lev) << endl; + + return 0; +} +