Mercurial > hg > medcouple
view jmedcouple.c++ @ 74:305b7361a5bd default tip @
showalgo: save a snapshot instead of waiting for keyboard input
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Sun, 29 May 2016 19:05:01 -0400 |
parents | 4c8b4c1e851e |
children |
line wrap: on
line source
// jmedcouple.c++ --- // Copyright © 2015 Jordi Gutiérrez Hermoso <jordigh@octave.org> // Author: Jordi Gutiérrez Hermoso // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. #include <vector> #include <limits> #include <iostream> #include <string> #include <fstream> #include <iomanip> #include <algorithm> #include <utility> #include <tuple> using namespace std; template <typename T> int signum(T val) { return (T(0) < val) - (val < T(0)); } template <typename Container, typename Out = typename Container::value_type> Out sum(const Container& C) { return accumulate(C.begin(), C.end(), Out()); } /* This computes the weighted median of array A with corresponding weights W. */ double wmedian(const vector<double>& A, const vector<long>& W) { typedef pair<double, long> aw_t; long n = A.size(); vector<aw_t> AW(n); for (long i = 0; i < n; i ++) AW[i] = make_pair(A[i], W[i]); long wtot = sum(W); long beg = 0; long end = n - 1; while (true) { long mid = (beg + end)/2; nth_element(AW.begin(), AW.begin() + mid, AW.end(), [](const aw_t& l, const aw_t& r){return l.first > r.first;}); double trial = AW[mid].first; long wleft = 0, wright = 0; for (aw_t& aw: AW ) { double a; long w; tie(a, w) = move(aw); if (a > trial) wleft += w; else // This also includes a == trial, i.e. the "middle" weight. wright += w; } if (2*wleft > wtot) end = mid; else if (2*wright < wtot) beg = mid; else return trial; } return 0; } double medcouple(const std::vector<double>& X, double eps1 = numeric_limits<double>::epsilon(), double eps2 = numeric_limits<double>::min()) { long n = X.size(), n2 = (n - 1)/2; if (n < 3) return 0; vector<double> Z = X; sort(Z.begin(), Z.end(), std::greater<double>()); double Zmed; if (n % 2 == 1) Zmed = Z[n2]; else Zmed = (Z[n2] + Z[n2 + 1])/2; // Check if the median is at the edges up to relative epsilon if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed))) return -1.0; if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed))) return 1.0; // Centre Z wrt median, so that median(Z) = 0. for_each(Z.begin(), Z.end(), [&](double& z){ z -= Zmed;}); // Scale inside [-0.5, 0.5], for greater numerical stability. double Zden = 2*max(Z[0], -Z[n - 1]); for_each(Z.begin(), Z.end(), [&](double& z){z /= Zden;}); Zmed /= Zden; double Zeps = eps1*(eps1 + abs(Zmed)); // These overlap on the entries that are tied with the median vector<double> Zplus, Zminus; copy_if(Z.begin(), Z.end(), back_inserter(Zplus), [=](double z){return z >= -Zeps;}); copy_if(Z.begin(), Z.end(), back_inserter(Zminus), [=](double z){return Zeps >= z;}); long n_plus = Zplus.size(), n_minus = Zminus.size(); /* Kernel function h for the medcouple, closing over the values of Zplus and Zminus just defined above. In case a and be are within epsilon of the median, the kernel is the signum of their position. */ auto h_kern = [&](long i, long j) { double a = Zplus[i]; double b = Zminus[j]; double h; if (abs(a - b) <= 2*eps2) h = signum(n_plus - 1 - i - j); else h = (a + b)/(a - b); return h; }; // Init left and right borders vector<long> L(n_plus, 0); vector<long> R(n_plus, n_minus - 1); long Ltot = 0; long Rtot = n_minus*n_plus; long medc_idx = Rtot/2; // kth pair algorithm (Johnson & Mizoguchi) while (Rtot - Ltot > n_plus) { // First, compute the median inside the given bounds vector<double> A; vector<long> W; for (long i = 0; i < n_plus; i++) { if (L[i] <= R[i]) { A.push_back(h_kern(i, (L[i] + R[i])/2)); W.push_back(R[i] - L[i] + 1); } } double Am = wmedian(A, W); double Am_eps = eps1*(eps1 + abs(Am)); // Compute new left and right boundaries, based on the weighted // median vector<long> P(n_plus), Q(n_plus); { long j = 0; for (long i = n_plus - 1; i >= 0; i--) { while (j < n_minus and h_kern(i, j) - Am > Am_eps) j++; P[i] = j - 1; } } { long j = n_minus - 1; for (long i = 0; i < n_plus; i++) { while (j >= 0 and h_kern(i, j) - Am < -Am_eps) j--; Q[i] = j + 1; } } long sumP = sum(P) + n_plus, sumQ = sum(Q); if (medc_idx <= sumP - 1) { R = P; Rtot = sumP; } else { if (medc_idx > sumQ - 1) { L = Q; Ltot = sumQ; } else return Am; } } // Didn't find the median, but now we have a very small search space // to find it in, just between the left and right boundaries. This // space is of size Rtot - Ltot which is <= n_plus vector<double> A; for (long i = 0; i < n_plus; i++) { for (long j = L[i]; j <= R[i]; j++) A.push_back(h_kern(i, j)); } nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(), [](double x, double y) {return x > y;}); double Am = A[medc_idx - Ltot]; return Am; } int main(int argc, char** argv) { double eps1 = numeric_limits<double>::epsilon(), eps2 = numeric_limits<double>::min(); string fname; if (argc > 1) { fname = argv[1]; } else { cerr << "Please provide input file." << endl; return 1; } ifstream ifs(fname); if (not ifs) { cerr << "File " << fname << " not found." << endl; return 1; } vector<double> data; double datum; while (ifs >> datum) { data.push_back(datum); } cout << setprecision(16) << medcouple(data, eps1, eps2) << endl; return 0; }