Mercurial > hg > medcouple
changeset 7:6c7c7dc9d8ef
hairball
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Thu, 15 Jan 2015 15:53:19 -0500 |
parents | e3b1dcc51e6a |
children | b3e878bb793d |
files | pymedcouple |
diffstat | 1 files changed, 83 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/pymedcouple +++ b/pymedcouple @@ -3,7 +3,7 @@ import random import numpy as np -from itertools import tee +from itertools import tee, izip def partsort(L, n): """This is a partial sort of the weighted list L = [(a,w) for a in A, @@ -46,7 +46,7 @@ wtot = sum(W) beg = 0 - end = n + end = n-1 while True: mid = (beg + end)//2 @@ -70,7 +70,7 @@ else: return trial -def medcouple_1d(X, eps1 = 2**-52, eps2 = 2**-1022 ): +def medcouple_1d(X, eps1 = 2**-52, eps2 = 2**-1022, debug=False ): """Calculates the medcouple robust measure of skewness. Parameters @@ -102,21 +102,27 @@ Z = sorted(X, reverse=True) if n % 2 == 1: - Zmed = Z[n2 + 1] + Zmed = Z[n2] else: - Zmed = (Z[n2] + Z[n2 + 1])/2 + Zmed = (Z[n2] + Z[n2+1])/2 #Check if the median is at the edges up to relative epsilon if abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)): return -1.0 if abs(Z[-1] - Zmed) < eps1*(eps1 + abs(Zmed)): return 1.0 - + + if debug: + print "Zmed = ", Zmed + # Centre Z wrt median, so that median(Z) = 0. Z = [z - Zmed for z in Z] # Scale inside [-0.5, 0.5], for greater numerical stability. - Zden = -2*max(-Z[0], Z[-1]) + Zden = 2*max(Z[0], -Z[-1]) + if debug: + print "Zden =", Zden + Z = [z/Zden for z in Z] Zmed /= Zden @@ -126,10 +132,20 @@ Zplus = [z for z in Z if z >= -Zeps] Zminus = [z for z in Z if Zeps >= z] + if debug: + for zp in Zplus: + print "Zplus : %g" % zp + for zm in Zminus: + print "Zminus: %g" % zm + n_plus = len(Zplus) n_minus = len(Zminus) - def h_kern(i, j): + if debug: + print "n_plus =", n_plus + print "n_minus = ", n_minus + + def h_kern(i, j, debug=False): """Kernel function h for the medcouple, closing over the values of Zplus and Zminus just defined above. @@ -141,62 +157,99 @@ b = Zminus[j] if abs(a - b) <= 2*eps2: - return signum(n_plus - 1 - i - j) + h = signum(n_plus - 1 - i - j) + else: + h = (a+b)/(a-b) - return (a+b)/(a-b) + if False: + print "a = {a}, b = {b}, h({i},{j}) = {h}".format(**locals()) + + return h # Init left and right borders L = [0]*n_plus - R = [n_plus-1]*n_plus + R = [n_minus - 1]*n_plus Ltot = 0 - Rtot = n_plus*n_minus + Rtot = n_minus*n_plus + mid_idx = (Rtot-1)//2 - mid_idx = Rtot//2 + if debug: + print "mid_idx = ", mid_idx # kth pair algorithm (Johnson & Mizoguchi) while Rtot - Ltot > n_plus: + if debug: + print "L = ", L + print "R = ", R # First, compute the median inside the given bounds # (Be stingy, reuse same generator) [I1, I2] = tee(i for i in xrange(0, n_plus) if L[i] <= R[i]) A = [h_kern(i, (L[i] + R[i])//2) for i in I1] - W = [R[i] - L[i] for i in I2] + W = [R[i] - L[i] + 1 for i in I2] Am = wmedian(A,W) + if debug: + print "Am = ", Am + + Am_eps = eps1*(eps1 + abs(Am)) # Compute new left and right boundaries, based on the weighted # median - P = Q = [] + P = [] + Q = [] - j = -1 + j = 0 for i in xrange(n_plus - 1, -1, -1): - while j < n_plus - 1 and h_kern(i, j) > Am: + while j < n_minus - 1 and h_kern(i, j) - Am > Am_eps: j += 1 - P.append(j) + P.append(j-1) + P.reverse() - j = n_plus + j = n_minus - 1 for i in xrange(0, n_plus): - while j > -1 and h_kern(i, j) < Am: + while j >= 0 and h_kern(i, j) - Am < -Am_eps: j -= 1 - Q.append(j) + Q.append(j+1) # Check on which side of those bounds the desired median of # the whole matrix may be. - sumP = sum(P) + sumP = sum(P) + len(P) sumQ = sum(Q) - if mid_idx <= sumP: + if debug: + print "P: ", sumP, P + print "Q: ", sumQ, Q + print + + if mid_idx <= sumP - 1: R = P Rtot = sumP - elif mid_idx > sumQ: - L = Q - Ltot = sumQ else: - return Am + if mid_idx > sumQ - 1: + L = Q + Ltot = sumQ + else: + return Am - # Uh, implement this part... - import ipdb; ipdb.set_trace() + # Didn't find the median, but now we have a very small search + # space to find it in, just between the left and right boundaries. + # This space is of size Rtot - Ltot which is <= n_plus + if debug: + print "L =", L + print "R =", R + A = [] + for (i, (l, r)) in enumerate(izip(L, R)): + for j in xrange(l, r + 1): + A.append(h_kern(i, j, debug=debug)) + A.sort() + A.reverse() + + if debug: + print "len(A) = ", len(A) + Am = A[mid_idx - Ltot] + return Am def signum(x): @@ -212,7 +265,7 @@ fname = sys.argv[1] with open(fname) as f: data = [float(x) for x in f.readlines() if x.strip() != ""] - print medcouple_1d(data) + print "%.16g" % medcouple_1d(data) if __name__ == "__main__": main()