Mercurial > hg > medcouple
comparison pymedcouple @ 5:cbe17f888c79
Make it executable
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Tue, 13 Jan 2015 17:00:49 -0500 |
parents | medcouple.py@74d0d08dbc95 |
children | e3b1dcc51e6a |
comparison
equal
deleted
inserted
replaced
4:74d0d08dbc95 | 5:cbe17f888c79 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import random | |
4 import numpy as np | |
5 | |
6 from itertools import tee | |
7 | |
8 def partsort(L, n): | |
9 """This is a partial sort of the weighted list L = [(a,w) for a in A, | |
10 w in W], so that the element at position n is in the correct spot. | |
11 Also known as quickselect. The elements of W are ignored for the | |
12 purposes of this partial sort. | |
13 """ | |
14 | |
15 beg = 0 | |
16 end = len(L) | |
17 | |
18 while True: | |
19 pivot = random.randint(beg,end-1) | |
20 pivotval = L[pivot][0] | |
21 L[pivot], L[end-1] = L[end-1], L[pivot] | |
22 | |
23 idx = beg | |
24 for i in xrange(beg, end): | |
25 if L[i][0] < pivotval: | |
26 L[idx], L[i] = L[i], L[idx] | |
27 idx += 1 | |
28 | |
29 L[idx], L[end-1] = L[end-1], L[idx] | |
30 | |
31 if idx == n: | |
32 return L[idx] | |
33 elif idx < n: | |
34 beg = idx+1 | |
35 else: | |
36 end = idx | |
37 | |
38 def wmedian(A, W): | |
39 """This computes the weighted median of array A with corresponding | |
40 weights W. | |
41 """ | |
42 | |
43 AW = zip(A,W) | |
44 n = len(AW) | |
45 | |
46 wtot = sum(W) | |
47 | |
48 beg = 0 | |
49 end = n | |
50 | |
51 while True: | |
52 mid = (beg + end)//2 | |
53 | |
54 partsort(AW, mid) | |
55 trial = AW[mid][0] | |
56 | |
57 wleft = wright = 0 | |
58 for (a,w) in AW: | |
59 if a < trial: | |
60 wleft += w | |
61 else: | |
62 # This also includes a == trial, i.e. the "middle" | |
63 # weight. | |
64 wright += w | |
65 | |
66 if 2*wleft > wtot: | |
67 end = mid | |
68 elif 2*wright <= wtot: | |
69 beg = mid | |
70 else: | |
71 return trial | |
72 | |
73 def medcouple_1d(X, eps1 = 2**-52, eps2 = 2**-1022 ): | |
74 """Calculates the medcouple robust measure of skewness. | |
75 | |
76 Parameters | |
77 ---------- | |
78 y : array-like, 1-d | |
79 | |
80 Returns | |
81 ------- | |
82 mc : float | |
83 The medcouple statistic | |
84 | |
85 .. [1] G. Brys, M. Hubert, and A. Struyf "A Robust Measure of | |
86 Skewness." Journal of Computational and Graphical Statistics, Vol. | |
87 13, No. 4 (Dec., 2004), pp. 996- 1017 | |
88 | |
89 .. [2] D. B. Johnson and T. Mizoguchi "Selecting the Kth Element | |
90 in $X + Y$ and $X_1 + X_2 + \cdots X_m$". SIAM Journal of | |
91 Computing, 7, 147-53. | |
92 | |
93 """ | |
94 # FIXME: Figure out what to do about NaNs. | |
95 | |
96 n = len(X) | |
97 n2 = (n-1)//2 | |
98 | |
99 if n < 3: | |
100 return 0 | |
101 | |
102 Z = sorted(X, reverse=True) | |
103 | |
104 if n % 2 == 1: | |
105 Zmed = Z[n2 + 1] | |
106 else: | |
107 Zmed = (Z[n2] + Z[n2 + 1])/2 | |
108 | |
109 #Check if the median is at the edges up to relative epsilon | |
110 if abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)): | |
111 return -1.0 | |
112 if abs(Z[-1] - Zmed) < eps1*(eps1 + abs(Zmed)): | |
113 return 1.0 | |
114 | |
115 # Centre Z wrt median, so that median(Z) = 0. | |
116 Z = [z - Zmed for z in Z] | |
117 | |
118 # Scale inside [-0.5, 0.5], for greater numerical stability. | |
119 Zden = -2*max(-Z[0], Z[-1]) | |
120 Z = [z/Zden for z in Z] | |
121 Zmed /= Zden | |
122 | |
123 Zeps = eps1*(eps1 + abs(Zmed)) | |
124 | |
125 # These overlap on the entries that are tied with the median | |
126 Zplus = [z for z in Z if z >= -Zeps] | |
127 Zminus = [z for z in Z if Zeps >= z] | |
128 | |
129 n_plus = len(Zplus) | |
130 n_minus = len(Zminus) | |
131 | |
132 def h_kern(i, j): | |
133 """Kernel function h for the medcouple, closing over the values of | |
134 Zplus and Zminus just defined above. | |
135 | |
136 In case a and be are within epsilon of the median, the kernel | |
137 is the signum of their position. | |
138 | |
139 """ | |
140 a = Zplus[i] | |
141 b = Zminus[j] | |
142 | |
143 if abs(a - b) <= 2*eps2: | |
144 return signum(n_plus - 1 - i - j) | |
145 | |
146 return (a+b)/(a-b) | |
147 | |
148 # Init left and right borders | |
149 L = [0]*n_plus | |
150 R = [n_plus-1]*n_plus | |
151 | |
152 Ltot = 0 | |
153 Rtot = n_plus*n_minus | |
154 | |
155 mid_idx = Rtot//2 | |
156 | |
157 # kth pair algorithm (Johnson & Mizoguchi) | |
158 while Rtot - Ltot > n_plus: | |
159 | |
160 # First, compute the median inside the given bounds | |
161 I1 = (i for i in xrange(0, n_plus) if L[i] <= R[i]) | |
162 I2 = tee(I1) # Be stingy, reuse same generator | |
163 A = [h_kern(i, (L[i] + R[i])//2) for i in I1] | |
164 W = [R[i] - L[i] for i in I2] | |
165 Am = wmedian(A,W) | |
166 | |
167 # Compute new left and right boundaries, based on the weighted | |
168 # median | |
169 P = Q = [] | |
170 | |
171 j = -1 | |
172 for i in xrange(n_plus - 1, -1, -1): | |
173 while j < n_plus - 1 and h_kern(i, j) > Am: | |
174 j += 1 | |
175 P.append(j) | |
176 P.reverse() | |
177 | |
178 j = n_plus | |
179 for i in xrange(0, n_plus): | |
180 while j > -1 and h_kern(i, j) < Am: | |
181 j -= 1 | |
182 Q.append(j) | |
183 | |
184 # Check on which side of those bounds the desired median of | |
185 # the whole matrix may be. | |
186 sumP = sum(P) | |
187 sumQ = sum(Q) | |
188 if mid_idx <= sumP: | |
189 R = P | |
190 Rtot = sumP | |
191 elif mid_idx > sumQ: | |
192 L = Q | |
193 Ltot = sumQ | |
194 else: | |
195 return Am | |
196 | |
197 # Uh, implement this part... | |
198 import ipdb; ipdb.set_trace() | |
199 | |
200 | |
201 def signum(x): | |
202 if x > 0: | |
203 return 1 | |
204 if x < 0: | |
205 return -1 | |
206 else: | |
207 return 0 | |
208 | |
209 def main(): | |
210 import sys | |
211 fname = sys.argv[1] | |
212 with open(fname) as f: | |
213 data = [float(x) for x in f.readlines() if x.strip() != ""] | |
214 print medcouple_1d(data) | |
215 | |
216 if __name__ == "__main__": | |
217 main() |