view medcouple.d @ 74:305b7361a5bd default tip @

showalgo: save a snapshot instead of waiting for keyboard input
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sun, 29 May 2016 19:05:01 -0400
parents 18e7cd6ff057
children
line wrap: on
line source

// medcouple.d ---

// Copyright  © 2016 Jordi Gutiérrez Hermoso <jordigh@octave.org>

// Author: Jordi Gutiérrez Hermoso

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 3
// of the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

import std.stdio;
import std.array;
import std.conv: to;
import std.range: iota, zip;
import std.string: strip;
import std.typecons: Tuple;
import std.algorithm: filter, map, max, reduce, sort, topN;
import std.math: abs;

// Could also just be reduce!"a+b", they call this "string lambda"
alias sum = reduce!((a,b) => a + b);

int signum(T)(T val) {
  return (cast(T)(0) < val ) - (val < cast(T)(0));
}

double wmedian(double[] A, long[] W) {
  auto AW = zip(A,W).array;
  long n = AW.length;
  auto wtot = sum(W);

  typeof(n)
    beg = 0,
    end = n - 1;

  while(true) {
    auto mid = (beg+end)/2;
    topN!((x, y) => x[0] > y[0])(AW[beg..end+1], mid);

    auto trial = AW[mid][0];

    typeof(n) wleft = 0, wright = 0;

    foreach(aw; AW) {
      auto a = aw[0];
      auto w = aw[1];
      if (a > trial)
        wleft += w;
      else
        wright += w;
    }

    if (2*wleft > wtot)
      end = mid;
    else if (2*wright < wtot)
      beg = mid;
    else
      return trial;
  }
}

double medcouple(const double[] X,
                 double eps1 = double.epsilon,
                 double eps2 = double.min)
{
  long n = X.length, n2 = (n-1)/2;
  if (n < 3)
    return 0;

  auto Z = X.dup;
  sort!((a,b) => a > b)(Z);

  auto Zmed = n % 2 == 1 ? Z[n2] : (Z[n2] + Z[n2 + 1])/2;

  // Check if the median is at the edges up to relative epsilon
  if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)))
    return -1.0;
  if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed)))
    return 1.0;

  // Centre Z wrt median, so that median(Z) = 0.
  Z[] -= Zmed;

  // Scale inside [-0.5, 0.5], for greater numerical stability.
  auto Zden = 2*max(Z[0], -Z[n - 1]);
  Z[] /= Zden;
  Zmed /= Zden;

  
  auto
    Zeps = eps1*(eps1 + abs(Zmed)),

    // These overlap on the entries that are tied with the median
    Zplus = filter!(z => z >= -Zeps)(Z).array,
    Zminus = filter!(z => Zeps >= z)(Z).array;

  long
    n_plus = Zplus.length,
    n_minus = Zminus.length;


  /*
    Kernel function h for the medcouple, closing over the values of
    Zplus and Zminus just defined above.

    In case a and be are within epsilon of the median, the kernel
    is the signum of their position.
  */
  auto h_kern(long i, long j){
    auto
      a = Zplus[i],
      b = Zminus[j];

    if (abs(a - b) <= 2*eps2)
      return signum(n_plus - 1 - i - j);
    else
      return (a+b)/(a-b);
  }


  auto
    // Init left and right borders
    L = replicate([0L], n_plus),
    R = replicate([n_minus - 1], n_plus),
    
    Ltot = 0L,
    Rtot = n_minus*n_plus,
    medc_idx = Rtot/2;

  // kth pair algorithm (Johnson & Mizoguchi)
  while (Rtot - Ltot > n_plus) {

    // First, compute the weighted median of row medians inside the
    // given bounds
    auto
      I = filter!(i => L[i] <= R[i])(iota(0, n_plus-1)),
      A = map!(i => h_kern(i, (L[i] + R[i])/2))(I).array,
      W = map!(i => R[i] - L[i] + 1)(I).array,

      Am = wmedian(A, W),
      Am_eps = eps1*(eps1 + abs(Am));

    // Compute new left and right boundaries, based on the weighted
    // median
    auto P = new long[n_plus], Q = new long[n_plus];
    {
      auto j = 0L;
      for (auto i = n_plus - 1; i >= 0; i--) {
        while (j < n_minus && h_kern(i, j) - Am > Am_eps)
          j++;
        P[i] = j - 1;
      }
    }

    {
      auto j = n_minus - 1;
      for (auto i = 0; i < n_plus; i++) {
        while (j >= 0 && h_kern(i, j) - Am < -Am_eps)
          j--;
        Q[i] = j + 1;
      }
    }

    // Discard left or right entries, depending on which side the
    // medcouple is.
    auto
      sumP = sum(P) + n_plus,
      sumQ = sum(Q);

    if (medc_idx <= sumP - 1) {
      R = P;
      Rtot = sumP;
    }
    else {
      if (medc_idx > sumQ - 1) {
        L = Q;
        Ltot = sumQ;
      }
      else
        return Am;
    }
  }

  // Didn't find the median, but now we have a very small search space
  // to find it in, just between the left and right boundaries. This
  // space is of size Rtot - Ltot which is <= n_plus
  auto
    A = new double[Rtot - Ltot],
    idx = 0L;

  for (auto i = 0L; i < n_plus; i++) {
    for (auto j = L[i]; j <= R[i]; j++){
      A[idx] = h_kern(i, j);
      ++idx;
    }
  }
  
  topN!((x,y) => x > y)(A, medc_idx - Ltot);
  return A[medc_idx - Ltot];
 
}

int main(string[] argv) {
  if (argv.length < 2) {
    stderr.writeln("Please provide input file.");
    return 1;
  }

  auto fname = argv[1];
  double[] data;

  foreach(line; File(fname).byLine) {
    if (! line.empty)
      data ~= to!(double)(line.strip);
  }

  writefln("%0.16f", medcouple(data));
    
  return 0;
}