Mercurial > hg > medcouple
view medcouple.d @ 74:305b7361a5bd default tip @
showalgo: save a snapshot instead of waiting for keyboard input
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Sun, 29 May 2016 19:05:01 -0400 |
parents | 18e7cd6ff057 |
children |
line wrap: on
line source
// medcouple.d --- // Copyright © 2016 Jordi Gutiérrez Hermoso <jordigh@octave.org> // Author: Jordi Gutiérrez Hermoso // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 3 // of the License, or (at your option) any later version. // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. import std.stdio; import std.array; import std.conv: to; import std.range: iota, zip; import std.string: strip; import std.typecons: Tuple; import std.algorithm: filter, map, max, reduce, sort, topN; import std.math: abs; // Could also just be reduce!"a+b", they call this "string lambda" alias sum = reduce!((a,b) => a + b); int signum(T)(T val) { return (cast(T)(0) < val ) - (val < cast(T)(0)); } double wmedian(double[] A, long[] W) { auto AW = zip(A,W).array; long n = AW.length; auto wtot = sum(W); typeof(n) beg = 0, end = n - 1; while(true) { auto mid = (beg+end)/2; topN!((x, y) => x[0] > y[0])(AW[beg..end+1], mid); auto trial = AW[mid][0]; typeof(n) wleft = 0, wright = 0; foreach(aw; AW) { auto a = aw[0]; auto w = aw[1]; if (a > trial) wleft += w; else wright += w; } if (2*wleft > wtot) end = mid; else if (2*wright < wtot) beg = mid; else return trial; } } double medcouple(const double[] X, double eps1 = double.epsilon, double eps2 = double.min) { long n = X.length, n2 = (n-1)/2; if (n < 3) return 0; auto Z = X.dup; sort!((a,b) => a > b)(Z); auto Zmed = n % 2 == 1 ? Z[n2] : (Z[n2] + Z[n2 + 1])/2; // Check if the median is at the edges up to relative epsilon if (abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed))) return -1.0; if (abs(Z[n - 1] - Zmed) < eps1*(eps1 + abs(Zmed))) return 1.0; // Centre Z wrt median, so that median(Z) = 0. Z[] -= Zmed; // Scale inside [-0.5, 0.5], for greater numerical stability. auto Zden = 2*max(Z[0], -Z[n - 1]); Z[] /= Zden; Zmed /= Zden; auto Zeps = eps1*(eps1 + abs(Zmed)), // These overlap on the entries that are tied with the median Zplus = filter!(z => z >= -Zeps)(Z).array, Zminus = filter!(z => Zeps >= z)(Z).array; long n_plus = Zplus.length, n_minus = Zminus.length; /* Kernel function h for the medcouple, closing over the values of Zplus and Zminus just defined above. In case a and be are within epsilon of the median, the kernel is the signum of their position. */ auto h_kern(long i, long j){ auto a = Zplus[i], b = Zminus[j]; if (abs(a - b) <= 2*eps2) return signum(n_plus - 1 - i - j); else return (a+b)/(a-b); } auto // Init left and right borders L = replicate([0L], n_plus), R = replicate([n_minus - 1], n_plus), Ltot = 0L, Rtot = n_minus*n_plus, medc_idx = Rtot/2; // kth pair algorithm (Johnson & Mizoguchi) while (Rtot - Ltot > n_plus) { // First, compute the weighted median of row medians inside the // given bounds auto I = filter!(i => L[i] <= R[i])(iota(0, n_plus-1)), A = map!(i => h_kern(i, (L[i] + R[i])/2))(I).array, W = map!(i => R[i] - L[i] + 1)(I).array, Am = wmedian(A, W), Am_eps = eps1*(eps1 + abs(Am)); // Compute new left and right boundaries, based on the weighted // median auto P = new long[n_plus], Q = new long[n_plus]; { auto j = 0L; for (auto i = n_plus - 1; i >= 0; i--) { while (j < n_minus && h_kern(i, j) - Am > Am_eps) j++; P[i] = j - 1; } } { auto j = n_minus - 1; for (auto i = 0; i < n_plus; i++) { while (j >= 0 && h_kern(i, j) - Am < -Am_eps) j--; Q[i] = j + 1; } } // Discard left or right entries, depending on which side the // medcouple is. auto sumP = sum(P) + n_plus, sumQ = sum(Q); if (medc_idx <= sumP - 1) { R = P; Rtot = sumP; } else { if (medc_idx > sumQ - 1) { L = Q; Ltot = sumQ; } else return Am; } } // Didn't find the median, but now we have a very small search space // to find it in, just between the left and right boundaries. This // space is of size Rtot - Ltot which is <= n_plus auto A = new double[Rtot - Ltot], idx = 0L; for (auto i = 0L; i < n_plus; i++) { for (auto j = L[i]; j <= R[i]; j++){ A[idx] = h_kern(i, j); ++idx; } } topN!((x,y) => x > y)(A, medc_idx - Ltot); return A[medc_idx - Ltot]; } int main(string[] argv) { if (argv.length < 2) { stderr.writeln("Please provide input file."); return 1; } auto fname = argv[1]; double[] data; foreach(line; File(fname).byLine) { if (! line.empty) data ~= to!(double)(line.strip); } writefln("%0.16f", medcouple(data)); return 0; }