view jmedcouple.c++ @ 74:305b7361a5bd default tip @

showalgo: save a snapshot instead of waiting for keyboard input
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sun, 29 May 2016 19:05:01 -0400
parents 4c8b4c1e851e
children
line wrap: on
line source

// jmedcouple.c++ ---

// Copyright  © 2015 Jordi Gutiérrez Hermoso <jordigh@octave.org>

// Author: Jordi Gutiérrez Hermoso

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

#include <vector>
#include <limits>
#include <iostream>
#include <string>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <utility>
#include <tuple>

using namespace std;

template <typename T>
int signum(T val) {
  return (T(0) < val) - (val < T(0));
}

template <typename Container, typename Out = typename Container::value_type>
Out sum(const Container& C)  {
  return accumulate(C.begin(), C.end(), Out());
}

/*
  This computes the weighted median of array A with corresponding
  weights W.
*/
double wmedian(const vector<double>& A,
               const vector<long>& W)
{
  typedef pair<double, long> aw_t;

  long n = A.size();
  vector<aw_t> AW(n);
  for (long i = 0; i < n; i ++)
    AW[i] = make_pair(A[i], W[i]);

  long wtot = sum(W);

  long beg = 0;
  long end = n - 1;

  while (true) {
    long mid = (beg + end)/2;
    nth_element(AW.begin(), AW.begin() + mid, AW.end(),
                [](const aw_t& l, const aw_t& r){return l.first > r.first;});
    double trial = AW[mid].first;
    long wleft = 0, wright = 0;

    for (aw_t& aw: AW ) {
      double a;
      long w;
      tie(a, w) = move(aw);
      if (a > trial)
        wleft += w;
      else
        // This also includes a == trial, i.e. the "middle" weight.
        wright += w;
    }

    if (2*wleft > wtot)
      end = mid;
    else if (2*wright < wtot)
      beg = mid;
    else
      return trial;
  }

  return 0;
}

double medcouple(const std::vector<double>& X,
                 double eps1 = numeric_limits<double>::epsilon(),
                 double eps2 = numeric_limits<double>::min())
{
  long n = X.size(), n2 = (n - 1)/2;

  if (n < 3)
    return 0;

  vector<double> Z = X;
  sort(Z.begin(), Z.end(), std::greater<double>());

  double Zmed;
  if (n % 2 == 1)
    Zmed = Z[n2];
  else
    Zmed = (Z[n2] + Z[n2 + 1])/2;

  // Check if the median is at the edges up to relative epsilon
  if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)))
    return -1.0;
  if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed)))
    return 1.0;

  // Centre Z wrt median, so that median(Z) = 0.
  for_each(Z.begin(), Z.end(), [&](double& z){ z -= Zmed;});

  // Scale inside [-0.5, 0.5], for greater numerical stability.
  double Zden = 2*max(Z[0], -Z[n - 1]);
  for_each(Z.begin(), Z.end(), [&](double& z){z /= Zden;});
  Zmed /= Zden;

  double Zeps = eps1*(eps1 + abs(Zmed));

  // These overlap on the entries that are tied with the median
  vector<double> Zplus, Zminus;
  copy_if(Z.begin(), Z.end(), back_inserter(Zplus),
          [=](double z){return z >= -Zeps;});
  copy_if(Z.begin(), Z.end(), back_inserter(Zminus),
          [=](double z){return Zeps >= z;});

  long
    n_plus = Zplus.size(),
    n_minus = Zminus.size();

  /*
    Kernel function h for the medcouple, closing over the values of
    Zplus and Zminus just defined above.

    In case a and be are within epsilon of the median, the kernel
    is the signum of their position.
  */
  auto h_kern = [&](long i, long j) {
    double a = Zplus[i];
    double b = Zminus[j];

    double h;
    if (abs(a - b) <= 2*eps2)
      h = signum(n_plus - 1 - i - j);
    else
      h = (a + b)/(a - b);

    return h;
  };

  // Init left and right borders
  vector<long> L(n_plus, 0);
  vector<long> R(n_plus, n_minus - 1);

  long Ltot = 0;
  long Rtot = n_minus*n_plus;
  long medc_idx = Rtot/2;
  
  // kth pair algorithm (Johnson & Mizoguchi)
  while (Rtot - Ltot > n_plus) {

    // First, compute the median inside the given bounds
    vector<double> A;
    vector<long> W;
    for (long i = 0; i < n_plus; i++) {
      if (L[i] <= R[i]) {
        A.push_back(h_kern(i, (L[i] + R[i])/2));
        W.push_back(R[i] - L[i] + 1);
      }
    }
    double Am = wmedian(A, W);
    
    double Am_eps = eps1*(eps1 + abs(Am));

    // Compute new left and right boundaries, based on the weighted
    // median
    vector<long> P(n_plus), Q(n_plus);

    {
      long j = 0;
      for (long i = n_plus - 1; i >= 0; i--) {
        while (j < n_minus and h_kern(i, j) - Am > Am_eps)
          j++;
        P[i] = j - 1;
      }
    }

    {
      long j = n_minus - 1;
      for (long i = 0; i < n_plus; i++) {
        while (j >= 0 and h_kern(i, j) - Am < -Am_eps)
          j--;
        Q[i] = j + 1;
      }
    }

    long 
      sumP = sum(P) + n_plus,
      sumQ = sum(Q);

    if (medc_idx <= sumP - 1) {
      R = P;
      Rtot = sumP;
    }
    else {
      if (medc_idx > sumQ - 1) {
        L = Q;
        Ltot = sumQ;
      }
      else
        return Am;
    }
  }

  // Didn't find the median, but now we have a very small search space
  // to find it in, just between the left and right boundaries. This
  // space is of size Rtot - Ltot which is <= n_plus
  vector<double> A;
  for (long i = 0; i < n_plus; i++) {
    for (long j = L[i]; j <= R[i]; j++)
      A.push_back(h_kern(i, j));
  }
  nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(),
              [](double x, double y) {return x > y;});
  
  double Am = A[medc_idx - Ltot];
  return Am;
}


int main(int argc, char** argv) {
  double 
    eps1 = numeric_limits<double>::epsilon(), 
    eps2 = numeric_limits<double>::min();

  string fname;
  if (argc > 1) {
    fname = argv[1];
  }
  else {
    cerr << "Please provide input file." << endl;
    return 1;
  }

  ifstream ifs(fname);
  if (not ifs) {
    cerr << "File " << fname << " not found." << endl;
    return 1;
  }

  vector<double> data;  
  double datum;
  while (ifs >> datum) {
    data.push_back(datum);
  }
  
  cout << setprecision(16) 
       << medcouple(data, eps1, eps2) << endl;

  return 0;
}