Mercurial > hg > medcouple
annotate pymedcouple @ 22:f5b4a2ab6204
medcouple_1d: modify the definition of medc_idx to match R's definition
This uses the right (not left) middle in case of an even number of
things and the middle one in case of an odd number.
author | Jordi Gutiérrez Hermoso <jordigh@octave.org> |
---|---|
date | Sat, 17 Jan 2015 19:56:07 -0500 |
parents | d63e763f8ac1 |
children | 29a178b23219 |
rev | line source |
---|---|
5
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
1 #!/usr/bin/python |
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
2 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
3 import random |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
4 import numpy as np |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
5 |
7 | 6 from itertools import tee, izip |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
7 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
8 def partsort(L, n): |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
9 """This is a partial sort of the weighted list L = [(a,w) for a in A, |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
10 w in W], so that the element at position n is in the correct spot. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
11 Also known as quickselect. The elements of W are ignored for the |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
12 purposes of this partial sort. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
13 """ |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
14 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
15 beg = 0 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
16 end = len(L) |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
17 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
18 while True: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
19 pivot = random.randint(beg,end-1) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
20 pivotval = L[pivot][0] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
21 L[pivot], L[end-1] = L[end-1], L[pivot] |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
22 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
23 idx = beg |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
24 for i in xrange(beg, end): |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
25 if L[i][0] < pivotval: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
26 L[idx], L[i] = L[i], L[idx] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
27 idx += 1 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
28 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
29 L[idx], L[end-1] = L[end-1], L[idx] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
30 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
31 if idx == n: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
32 return L[idx] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
33 elif idx < n: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
34 beg = idx+1 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
35 else: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
36 end = idx |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
37 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
38 def wmedian(A, W): |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
39 """This computes the weighted median of array A with corresponding |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
40 weights W. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
41 """ |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
42 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
43 AW = zip(A,W) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
44 n = len(AW) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
45 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
46 wtot = sum(W) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
47 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
48 beg = 0 |
7 | 49 end = n-1 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
50 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
51 while True: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
52 mid = (beg + end)//2 |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
53 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
54 partsort(AW, mid) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
55 trial = AW[mid][0] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
56 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
57 wleft = wright = 0 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
58 for (a,w) in AW: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
59 if a < trial: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
60 wleft += w |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
61 else: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
62 # This also includes a == trial, i.e. the "middle" |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
63 # weight. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
64 wright += w |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
65 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
66 if 2*wleft > wtot: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
67 end = mid |
21
d63e763f8ac1
wmedian: don't infinite loop if wleft == wright
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
20
diff
changeset
|
68 elif 2*wright < wtot: |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
69 beg = mid |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
70 else: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
71 return trial |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
72 |
10
3d145bcc8694
Remove more debug code
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
9
diff
changeset
|
73 def medcouple_1d(X, eps1 = 2**-52, eps2 = 2**-1022): |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
74 """Calculates the medcouple robust measure of skewness. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
75 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
76 Parameters |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
77 ---------- |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
78 y : array-like, 1-d |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
79 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
80 Returns |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
81 ------- |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
82 mc : float |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
83 The medcouple statistic |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
84 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
85 .. [1] G. Brys, M. Hubert, and A. Struyf "A Robust Measure of |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
86 Skewness." Journal of Computational and Graphical Statistics, Vol. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
87 13, No. 4 (Dec., 2004), pp. 996- 1017 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
88 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
89 .. [2] D. B. Johnson and T. Mizoguchi "Selecting the Kth Element |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
90 in $X + Y$ and $X_1 + X_2 + \cdots X_m$". SIAM Journal of |
9 | 91 Computing, Vol. 7, No. 2 (May 1978), pp. 147-53. |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
92 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
93 """ |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
94 # FIXME: Figure out what to do about NaNs. |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
95 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
96 n = len(X) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
97 n2 = (n-1)//2 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
98 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
99 if n < 3: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
100 return 0 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
101 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
102 Z = sorted(X, reverse=True) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
103 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
104 if n % 2 == 1: |
7 | 105 Zmed = Z[n2] |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
106 else: |
7 | 107 Zmed = (Z[n2] + Z[n2+1])/2 |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
108 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
109 #Check if the median is at the edges up to relative epsilon |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
110 if abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)): |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
111 return -1.0 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
112 if abs(Z[-1] - Zmed) < eps1*(eps1 + abs(Zmed)): |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
113 return 1.0 |
7 | 114 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
115 # Centre Z wrt median, so that median(Z) = 0. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
116 Z = [z - Zmed for z in Z] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
117 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
118 # Scale inside [-0.5, 0.5], for greater numerical stability. |
7 | 119 Zden = 2*max(Z[0], -Z[-1]) |
5
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
120 Z = [z/Zden for z in Z] |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
121 Zmed /= Zden |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
122 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
123 Zeps = eps1*(eps1 + abs(Zmed)) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
124 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
125 # These overlap on the entries that are tied with the median |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
126 Zplus = [z for z in Z if z >= -Zeps] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
127 Zminus = [z for z in Z if Zeps >= z] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
128 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
129 n_plus = len(Zplus) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
130 n_minus = len(Zminus) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
131 |
10
3d145bcc8694
Remove more debug code
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
9
diff
changeset
|
132 def h_kern(i, j): |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
133 """Kernel function h for the medcouple, closing over the values of |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
134 Zplus and Zminus just defined above. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
135 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
136 In case a and be are within epsilon of the median, the kernel |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
137 is the signum of their position. |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
138 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
139 """ |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
140 a = Zplus[i] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
141 b = Zminus[j] |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
142 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
143 if abs(a - b) <= 2*eps2: |
7 | 144 h = signum(n_plus - 1 - i - j) |
145 else: | |
146 h = (a+b)/(a-b) | |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
147 |
7 | 148 return h |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
149 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
150 # Init left and right borders |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
151 L = [0]*n_plus |
7 | 152 R = [n_minus - 1]*n_plus |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
153 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
154 Ltot = 0 |
7 | 155 Rtot = n_minus*n_plus |
22
f5b4a2ab6204
medcouple_1d: modify the definition of medc_idx to match R's definition
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
21
diff
changeset
|
156 medc_idx = Rtot//2 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
157 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
158 # kth pair algorithm (Johnson & Mizoguchi) |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
159 while Rtot - Ltot > n_plus: |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
160 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
161 # First, compute the median inside the given bounds |
6 | 162 # (Be stingy, reuse same generator) |
163 [I1, I2] = tee(i for i in xrange(0, n_plus) if L[i] <= R[i]) | |
164 | |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
165 A = [h_kern(i, (L[i] + R[i])//2) for i in I1] |
7 | 166 W = [R[i] - L[i] + 1 for i in I2] |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
167 Am = wmedian(A,W) |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
168 |
7 | 169 Am_eps = eps1*(eps1 + abs(Am)) |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
170 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
171 # Compute new left and right boundaries, based on the weighted |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
172 # median |
7 | 173 P = [] |
174 Q = [] | |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
175 |
7 | 176 j = 0 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
177 for i in xrange(n_plus - 1, -1, -1): |
20
8e02346789b2
Fix off by one error in P computation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
13
diff
changeset
|
178 while j < n_minus and h_kern(i, j) - Am > Am_eps: |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
179 j += 1 |
12
c81f6d263897
Spaces around plus and minus
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
11
diff
changeset
|
180 P.append(j - 1) |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
181 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
182 P.reverse() |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
183 |
7 | 184 j = n_minus - 1 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
185 for i in xrange(0, n_plus): |
7 | 186 while j >= 0 and h_kern(i, j) - Am < -Am_eps: |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
187 j -= 1 |
12
c81f6d263897
Spaces around plus and minus
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
11
diff
changeset
|
188 Q.append(j + 1) |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
189 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
190 # Check on which side of those bounds the desired median of |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
191 # the whole matrix may be. |
7 | 192 sumP = sum(P) + len(P) |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
193 sumQ = sum(Q) |
11
4c7d7ff28a0e
Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
10
diff
changeset
|
194 |
4c7d7ff28a0e
Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
10
diff
changeset
|
195 if medc_idx <= sumP - 1: |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
196 R = P |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
197 Rtot = sumP |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
198 else: |
11
4c7d7ff28a0e
Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
10
diff
changeset
|
199 if medc_idx > sumQ - 1: |
7 | 200 L = Q |
201 Ltot = sumQ | |
202 else: | |
203 return Am | |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
204 |
7 | 205 # Didn't find the median, but now we have a very small search |
206 # space to find it in, just between the left and right boundaries. | |
207 # This space is of size Rtot - Ltot which is <= n_plus | |
208 A = [] | |
209 for (i, (l, r)) in enumerate(izip(L, R)): | |
210 for j in xrange(l, r + 1): | |
10
3d145bcc8694
Remove more debug code
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
9
diff
changeset
|
211 A.append(h_kern(i, j)) |
7 | 212 A.sort() |
213 A.reverse() | |
214 | |
11
4c7d7ff28a0e
Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
10
diff
changeset
|
215 Am = A[medc_idx - Ltot] |
4c7d7ff28a0e
Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
10
diff
changeset
|
216 |
7 | 217 return Am |
13
077261db7a58
Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
12
diff
changeset
|
218 |
4
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
219 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
220 def signum(x): |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
221 if x > 0: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
222 return 1 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
223 if x < 0: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
224 return -1 |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
225 else: |
74d0d08dbc95
Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff
changeset
|
226 return 0 |
5
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
227 |
8
b3e878bb793d
Remove debug statements
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
7
diff
changeset
|
228 |
5
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
229 def main(): |
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
230 import sys |
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
231 fname = sys.argv[1] |
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
232 with open(fname) as f: |
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
233 data = [float(x) for x in f.readlines() if x.strip() != ""] |
8
b3e878bb793d
Remove debug statements
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
7
diff
changeset
|
234 |
7 | 235 print "%.16g" % medcouple_1d(data) |
5
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
236 |
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
237 if __name__ == "__main__": |
cbe17f888c79
Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
4
diff
changeset
|
238 main() |