Mercurial > hg > medcouple
comparison pymedcouple @ 7:6c7c7dc9d8ef
hairball
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Thu, 15 Jan 2015 15:53:19 -0500 |
parents | e3b1dcc51e6a |
children | b3e878bb793d |
comparison
equal
deleted
inserted
replaced
6:e3b1dcc51e6a | 7:6c7c7dc9d8ef |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 import random | 3 import random |
4 import numpy as np | 4 import numpy as np |
5 | 5 |
6 from itertools import tee | 6 from itertools import tee, izip |
7 | 7 |
8 def partsort(L, n): | 8 def partsort(L, n): |
9 """This is a partial sort of the weighted list L = [(a,w) for a in A, | 9 """This is a partial sort of the weighted list L = [(a,w) for a in A, |
10 w in W], so that the element at position n is in the correct spot. | 10 w in W], so that the element at position n is in the correct spot. |
11 Also known as quickselect. The elements of W are ignored for the | 11 Also known as quickselect. The elements of W are ignored for the |
44 n = len(AW) | 44 n = len(AW) |
45 | 45 |
46 wtot = sum(W) | 46 wtot = sum(W) |
47 | 47 |
48 beg = 0 | 48 beg = 0 |
49 end = n | 49 end = n-1 |
50 | 50 |
51 while True: | 51 while True: |
52 mid = (beg + end)//2 | 52 mid = (beg + end)//2 |
53 | 53 |
54 partsort(AW, mid) | 54 partsort(AW, mid) |
68 elif 2*wright <= wtot: | 68 elif 2*wright <= wtot: |
69 beg = mid | 69 beg = mid |
70 else: | 70 else: |
71 return trial | 71 return trial |
72 | 72 |
73 def medcouple_1d(X, eps1 = 2**-52, eps2 = 2**-1022 ): | 73 def medcouple_1d(X, eps1 = 2**-52, eps2 = 2**-1022, debug=False ): |
74 """Calculates the medcouple robust measure of skewness. | 74 """Calculates the medcouple robust measure of skewness. |
75 | 75 |
76 Parameters | 76 Parameters |
77 ---------- | 77 ---------- |
78 y : array-like, 1-d | 78 y : array-like, 1-d |
100 return 0 | 100 return 0 |
101 | 101 |
102 Z = sorted(X, reverse=True) | 102 Z = sorted(X, reverse=True) |
103 | 103 |
104 if n % 2 == 1: | 104 if n % 2 == 1: |
105 Zmed = Z[n2 + 1] | 105 Zmed = Z[n2] |
106 else: | 106 else: |
107 Zmed = (Z[n2] + Z[n2 + 1])/2 | 107 Zmed = (Z[n2] + Z[n2+1])/2 |
108 | 108 |
109 #Check if the median is at the edges up to relative epsilon | 109 #Check if the median is at the edges up to relative epsilon |
110 if abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)): | 110 if abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)): |
111 return -1.0 | 111 return -1.0 |
112 if abs(Z[-1] - Zmed) < eps1*(eps1 + abs(Zmed)): | 112 if abs(Z[-1] - Zmed) < eps1*(eps1 + abs(Zmed)): |
113 return 1.0 | 113 return 1.0 |
114 | 114 |
115 if debug: | |
116 print "Zmed = ", Zmed | |
117 | |
115 # Centre Z wrt median, so that median(Z) = 0. | 118 # Centre Z wrt median, so that median(Z) = 0. |
116 Z = [z - Zmed for z in Z] | 119 Z = [z - Zmed for z in Z] |
117 | 120 |
118 # Scale inside [-0.5, 0.5], for greater numerical stability. | 121 # Scale inside [-0.5, 0.5], for greater numerical stability. |
119 Zden = -2*max(-Z[0], Z[-1]) | 122 Zden = 2*max(Z[0], -Z[-1]) |
123 if debug: | |
124 print "Zden =", Zden | |
125 | |
120 Z = [z/Zden for z in Z] | 126 Z = [z/Zden for z in Z] |
121 Zmed /= Zden | 127 Zmed /= Zden |
122 | 128 |
123 Zeps = eps1*(eps1 + abs(Zmed)) | 129 Zeps = eps1*(eps1 + abs(Zmed)) |
124 | 130 |
125 # These overlap on the entries that are tied with the median | 131 # These overlap on the entries that are tied with the median |
126 Zplus = [z for z in Z if z >= -Zeps] | 132 Zplus = [z for z in Z if z >= -Zeps] |
127 Zminus = [z for z in Z if Zeps >= z] | 133 Zminus = [z for z in Z if Zeps >= z] |
128 | 134 |
135 if debug: | |
136 for zp in Zplus: | |
137 print "Zplus : %g" % zp | |
138 for zm in Zminus: | |
139 print "Zminus: %g" % zm | |
140 | |
129 n_plus = len(Zplus) | 141 n_plus = len(Zplus) |
130 n_minus = len(Zminus) | 142 n_minus = len(Zminus) |
131 | 143 |
132 def h_kern(i, j): | 144 if debug: |
145 print "n_plus =", n_plus | |
146 print "n_minus = ", n_minus | |
147 | |
148 def h_kern(i, j, debug=False): | |
133 """Kernel function h for the medcouple, closing over the values of | 149 """Kernel function h for the medcouple, closing over the values of |
134 Zplus and Zminus just defined above. | 150 Zplus and Zminus just defined above. |
135 | 151 |
136 In case a and be are within epsilon of the median, the kernel | 152 In case a and be are within epsilon of the median, the kernel |
137 is the signum of their position. | 153 is the signum of their position. |
139 """ | 155 """ |
140 a = Zplus[i] | 156 a = Zplus[i] |
141 b = Zminus[j] | 157 b = Zminus[j] |
142 | 158 |
143 if abs(a - b) <= 2*eps2: | 159 if abs(a - b) <= 2*eps2: |
144 return signum(n_plus - 1 - i - j) | 160 h = signum(n_plus - 1 - i - j) |
145 | 161 else: |
146 return (a+b)/(a-b) | 162 h = (a+b)/(a-b) |
163 | |
164 if False: | |
165 print "a = {a}, b = {b}, h({i},{j}) = {h}".format(**locals()) | |
166 | |
167 return h | |
147 | 168 |
148 # Init left and right borders | 169 # Init left and right borders |
149 L = [0]*n_plus | 170 L = [0]*n_plus |
150 R = [n_plus-1]*n_plus | 171 R = [n_minus - 1]*n_plus |
151 | 172 |
152 Ltot = 0 | 173 Ltot = 0 |
153 Rtot = n_plus*n_minus | 174 Rtot = n_minus*n_plus |
154 | 175 mid_idx = (Rtot-1)//2 |
155 mid_idx = Rtot//2 | 176 |
177 if debug: | |
178 print "mid_idx = ", mid_idx | |
156 | 179 |
157 # kth pair algorithm (Johnson & Mizoguchi) | 180 # kth pair algorithm (Johnson & Mizoguchi) |
158 while Rtot - Ltot > n_plus: | 181 while Rtot - Ltot > n_plus: |
182 if debug: | |
183 print "L = ", L | |
184 print "R = ", R | |
159 | 185 |
160 # First, compute the median inside the given bounds | 186 # First, compute the median inside the given bounds |
161 # (Be stingy, reuse same generator) | 187 # (Be stingy, reuse same generator) |
162 [I1, I2] = tee(i for i in xrange(0, n_plus) if L[i] <= R[i]) | 188 [I1, I2] = tee(i for i in xrange(0, n_plus) if L[i] <= R[i]) |
163 | 189 |
164 A = [h_kern(i, (L[i] + R[i])//2) for i in I1] | 190 A = [h_kern(i, (L[i] + R[i])//2) for i in I1] |
165 W = [R[i] - L[i] for i in I2] | 191 W = [R[i] - L[i] + 1 for i in I2] |
166 Am = wmedian(A,W) | 192 Am = wmedian(A,W) |
193 if debug: | |
194 print "Am = ", Am | |
195 | |
196 Am_eps = eps1*(eps1 + abs(Am)) | |
167 | 197 |
168 # Compute new left and right boundaries, based on the weighted | 198 # Compute new left and right boundaries, based on the weighted |
169 # median | 199 # median |
170 P = Q = [] | 200 P = [] |
171 | 201 Q = [] |
172 j = -1 | 202 |
203 j = 0 | |
173 for i in xrange(n_plus - 1, -1, -1): | 204 for i in xrange(n_plus - 1, -1, -1): |
174 while j < n_plus - 1 and h_kern(i, j) > Am: | 205 while j < n_minus - 1 and h_kern(i, j) - Am > Am_eps: |
175 j += 1 | 206 j += 1 |
176 P.append(j) | 207 P.append(j-1) |
208 | |
177 P.reverse() | 209 P.reverse() |
178 | 210 |
179 j = n_plus | 211 j = n_minus - 1 |
180 for i in xrange(0, n_plus): | 212 for i in xrange(0, n_plus): |
181 while j > -1 and h_kern(i, j) < Am: | 213 while j >= 0 and h_kern(i, j) - Am < -Am_eps: |
182 j -= 1 | 214 j -= 1 |
183 Q.append(j) | 215 Q.append(j+1) |
184 | 216 |
185 # Check on which side of those bounds the desired median of | 217 # Check on which side of those bounds the desired median of |
186 # the whole matrix may be. | 218 # the whole matrix may be. |
187 sumP = sum(P) | 219 sumP = sum(P) + len(P) |
188 sumQ = sum(Q) | 220 sumQ = sum(Q) |
189 if mid_idx <= sumP: | 221 if debug: |
222 print "P: ", sumP, P | |
223 print "Q: ", sumQ, Q | |
224 print | |
225 | |
226 if mid_idx <= sumP - 1: | |
190 R = P | 227 R = P |
191 Rtot = sumP | 228 Rtot = sumP |
192 elif mid_idx > sumQ: | 229 else: |
193 L = Q | 230 if mid_idx > sumQ - 1: |
194 Ltot = sumQ | 231 L = Q |
195 else: | 232 Ltot = sumQ |
196 return Am | 233 else: |
197 | 234 return Am |
198 # Uh, implement this part... | 235 |
199 import ipdb; ipdb.set_trace() | 236 # Didn't find the median, but now we have a very small search |
237 # space to find it in, just between the left and right boundaries. | |
238 # This space is of size Rtot - Ltot which is <= n_plus | |
239 if debug: | |
240 print "L =", L | |
241 print "R =", R | |
242 A = [] | |
243 for (i, (l, r)) in enumerate(izip(L, R)): | |
244 for j in xrange(l, r + 1): | |
245 A.append(h_kern(i, j, debug=debug)) | |
246 A.sort() | |
247 A.reverse() | |
248 | |
249 if debug: | |
250 print "len(A) = ", len(A) | |
251 Am = A[mid_idx - Ltot] | |
252 return Am | |
200 | 253 |
201 | 254 |
202 def signum(x): | 255 def signum(x): |
203 if x > 0: | 256 if x > 0: |
204 return 1 | 257 return 1 |
210 def main(): | 263 def main(): |
211 import sys | 264 import sys |
212 fname = sys.argv[1] | 265 fname = sys.argv[1] |
213 with open(fname) as f: | 266 with open(fname) as f: |
214 data = [float(x) for x in f.readlines() if x.strip() != ""] | 267 data = [float(x) for x in f.readlines() if x.strip() != ""] |
215 print medcouple_1d(data) | 268 print "%.16g" % medcouple_1d(data) |
216 | 269 |
217 if __name__ == "__main__": | 270 if __name__ == "__main__": |
218 main() | 271 main() |