annotate pymedcouple @ 22:f5b4a2ab6204

medcouple_1d: modify the definition of medc_idx to match R's definition This uses the right (not left) middle in case of an even number of things and the middle one in case of an odd number.
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sat, 17 Jan 2015 19:56:07 -0500
parents d63e763f8ac1
children 29a178b23219
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
1 #!/usr/bin/python
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
2
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
3 import random
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
4 import numpy as np
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
5
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
6 from itertools import tee, izip
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
7
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
8 def partsort(L, n):
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
9 """This is a partial sort of the weighted list L = [(a,w) for a in A,
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
10 w in W], so that the element at position n is in the correct spot.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
11 Also known as quickselect. The elements of W are ignored for the
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
12 purposes of this partial sort.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
13 """
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
14
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
15 beg = 0
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
16 end = len(L)
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
17
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
18 while True:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
19 pivot = random.randint(beg,end-1)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
20 pivotval = L[pivot][0]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
21 L[pivot], L[end-1] = L[end-1], L[pivot]
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
22
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
23 idx = beg
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
24 for i in xrange(beg, end):
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
25 if L[i][0] < pivotval:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
26 L[idx], L[i] = L[i], L[idx]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
27 idx += 1
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
28
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
29 L[idx], L[end-1] = L[end-1], L[idx]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
30
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
31 if idx == n:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
32 return L[idx]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
33 elif idx < n:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
34 beg = idx+1
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
35 else:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
36 end = idx
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
37
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
38 def wmedian(A, W):
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
39 """This computes the weighted median of array A with corresponding
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
40 weights W.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
41 """
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
42
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
43 AW = zip(A,W)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
44 n = len(AW)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
45
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
46 wtot = sum(W)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
47
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
48 beg = 0
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
49 end = n-1
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
50
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
51 while True:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
52 mid = (beg + end)//2
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
53
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
54 partsort(AW, mid)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
55 trial = AW[mid][0]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
56
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
57 wleft = wright = 0
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
58 for (a,w) in AW:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
59 if a < trial:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
60 wleft += w
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
61 else:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
62 # This also includes a == trial, i.e. the "middle"
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
63 # weight.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
64 wright += w
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
65
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
66 if 2*wleft > wtot:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
67 end = mid
21
d63e763f8ac1 wmedian: don't infinite loop if wleft == wright
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 20
diff changeset
68 elif 2*wright < wtot:
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
69 beg = mid
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
70 else:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
71 return trial
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
72
10
3d145bcc8694 Remove more debug code
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 9
diff changeset
73 def medcouple_1d(X, eps1 = 2**-52, eps2 = 2**-1022):
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
74 """Calculates the medcouple robust measure of skewness.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
75
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
76 Parameters
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
77 ----------
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
78 y : array-like, 1-d
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
79
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
80 Returns
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
81 -------
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
82 mc : float
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
83 The medcouple statistic
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
84
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
85 .. [1] G. Brys, M. Hubert, and A. Struyf "A Robust Measure of
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
86 Skewness." Journal of Computational and Graphical Statistics, Vol.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
87 13, No. 4 (Dec., 2004), pp. 996- 1017
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
88
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
89 .. [2] D. B. Johnson and T. Mizoguchi "Selecting the Kth Element
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
90 in $X + Y$ and $X_1 + X_2 + \cdots X_m$". SIAM Journal of
9
e6bcaf38edaa Fix citation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 8
diff changeset
91 Computing, Vol. 7, No. 2 (May 1978), pp. 147-53.
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
92
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
93 """
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
94 # FIXME: Figure out what to do about NaNs.
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
95
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
96 n = len(X)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
97 n2 = (n-1)//2
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
98
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
99 if n < 3:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
100 return 0
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
101
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
102 Z = sorted(X, reverse=True)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
103
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
104 if n % 2 == 1:
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
105 Zmed = Z[n2]
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
106 else:
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
107 Zmed = (Z[n2] + Z[n2+1])/2
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
108
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
109 #Check if the median is at the edges up to relative epsilon
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
110 if abs(Z[0] - Zmed) < eps1*(eps1 + abs(Zmed)):
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
111 return -1.0
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
112 if abs(Z[-1] - Zmed) < eps1*(eps1 + abs(Zmed)):
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
113 return 1.0
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
114
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
115 # Centre Z wrt median, so that median(Z) = 0.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
116 Z = [z - Zmed for z in Z]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
117
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
118 # Scale inside [-0.5, 0.5], for greater numerical stability.
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
119 Zden = 2*max(Z[0], -Z[-1])
5
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
120 Z = [z/Zden for z in Z]
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
121 Zmed /= Zden
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
122
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
123 Zeps = eps1*(eps1 + abs(Zmed))
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
124
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
125 # These overlap on the entries that are tied with the median
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
126 Zplus = [z for z in Z if z >= -Zeps]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
127 Zminus = [z for z in Z if Zeps >= z]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
128
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
129 n_plus = len(Zplus)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
130 n_minus = len(Zminus)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
131
10
3d145bcc8694 Remove more debug code
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 9
diff changeset
132 def h_kern(i, j):
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
133 """Kernel function h for the medcouple, closing over the values of
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
134 Zplus and Zminus just defined above.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
135
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
136 In case a and be are within epsilon of the median, the kernel
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
137 is the signum of their position.
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
138
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
139 """
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
140 a = Zplus[i]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
141 b = Zminus[j]
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
142
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
143 if abs(a - b) <= 2*eps2:
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
144 h = signum(n_plus - 1 - i - j)
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
145 else:
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
146 h = (a+b)/(a-b)
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
147
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
148 return h
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
149
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
150 # Init left and right borders
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
151 L = [0]*n_plus
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
152 R = [n_minus - 1]*n_plus
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
153
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
154 Ltot = 0
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
155 Rtot = n_minus*n_plus
22
f5b4a2ab6204 medcouple_1d: modify the definition of medc_idx to match R's definition
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 21
diff changeset
156 medc_idx = Rtot//2
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
157
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
158 # kth pair algorithm (Johnson & Mizoguchi)
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
159 while Rtot - Ltot > n_plus:
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
160
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
161 # First, compute the median inside the given bounds
6
e3b1dcc51e6a Fix tee usage
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 5
diff changeset
162 # (Be stingy, reuse same generator)
e3b1dcc51e6a Fix tee usage
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 5
diff changeset
163 [I1, I2] = tee(i for i in xrange(0, n_plus) if L[i] <= R[i])
e3b1dcc51e6a Fix tee usage
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 5
diff changeset
164
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
165 A = [h_kern(i, (L[i] + R[i])//2) for i in I1]
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
166 W = [R[i] - L[i] + 1 for i in I2]
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
167 Am = wmedian(A,W)
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
168
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
169 Am_eps = eps1*(eps1 + abs(Am))
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
170
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
171 # Compute new left and right boundaries, based on the weighted
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
172 # median
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
173 P = []
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
174 Q = []
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
175
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
176 j = 0
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
177 for i in xrange(n_plus - 1, -1, -1):
20
8e02346789b2 Fix off by one error in P computation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 13
diff changeset
178 while j < n_minus and h_kern(i, j) - Am > Am_eps:
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
179 j += 1
12
c81f6d263897 Spaces around plus and minus
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 11
diff changeset
180 P.append(j - 1)
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
181
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
182 P.reverse()
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
183
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
184 j = n_minus - 1
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
185 for i in xrange(0, n_plus):
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
186 while j >= 0 and h_kern(i, j) - Am < -Am_eps:
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
187 j -= 1
12
c81f6d263897 Spaces around plus and minus
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 11
diff changeset
188 Q.append(j + 1)
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
189
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
190 # Check on which side of those bounds the desired median of
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
191 # the whole matrix may be.
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
192 sumP = sum(P) + len(P)
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
193 sumQ = sum(Q)
11
4c7d7ff28a0e Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 10
diff changeset
194
4c7d7ff28a0e Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 10
diff changeset
195 if medc_idx <= sumP - 1:
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
196 R = P
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
197 Rtot = sumP
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
198 else:
11
4c7d7ff28a0e Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 10
diff changeset
199 if medc_idx > sumQ - 1:
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
200 L = Q
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
201 Ltot = sumQ
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
202 else:
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
203 return Am
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
204
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
205 # Didn't find the median, but now we have a very small search
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
206 # space to find it in, just between the left and right boundaries.
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
207 # This space is of size Rtot - Ltot which is <= n_plus
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
208 A = []
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
209 for (i, (l, r)) in enumerate(izip(L, R)):
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
210 for j in xrange(l, r + 1):
10
3d145bcc8694 Remove more debug code
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 9
diff changeset
211 A.append(h_kern(i, j))
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
212 A.sort()
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
213 A.reverse()
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
214
11
4c7d7ff28a0e Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 10
diff changeset
215 Am = A[medc_idx - Ltot]
4c7d7ff28a0e Rename mid_idx to medc_idx
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 10
diff changeset
216
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
217 return Am
13
077261db7a58 Whitespace cleanup
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 12
diff changeset
218
4
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
219
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
220 def signum(x):
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
221 if x > 0:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
222 return 1
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
223 if x < 0:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
224 return -1
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
225 else:
74d0d08dbc95 Add Python implementation
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents:
diff changeset
226 return 0
5
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
227
8
b3e878bb793d Remove debug statements
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 7
diff changeset
228
5
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
229 def main():
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
230 import sys
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
231 fname = sys.argv[1]
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
232 with open(fname) as f:
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
233 data = [float(x) for x in f.readlines() if x.strip() != ""]
8
b3e878bb793d Remove debug statements
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 7
diff changeset
234
7
6c7c7dc9d8ef hairball
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 6
diff changeset
235 print "%.16g" % medcouple_1d(data)
5
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
236
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
237 if __name__ == "__main__":
cbe17f888c79 Make it executable
Jordi Gutiérrez Hermoso <jordigh@octave.org>
parents: 4
diff changeset
238 main()