comparison jmedcouple.c++ @ 29:cf8c8c825c6d

jmedcouple: change all ints to long, to avoid overflow
author Jordi Gutiérrez Hermoso <jordigh@octave.org>
date Sun, 18 Jan 2015 20:42:02 -0500
parents 65f6fad65f49
children 2e277129e81b
comparison
equal deleted inserted replaced
28:65f6fad65f49 29:cf8c8c825c6d
22 /* 22 /*
23 This computes the weighted median of array A with corresponding 23 This computes the weighted median of array A with corresponding
24 weights W. 24 weights W.
25 */ 25 */
26 double wmedian(const vector<double>& A, 26 double wmedian(const vector<double>& A,
27 const vector<int>& W) 27 const vector<long>& W)
28 { 28 {
29 typedef pair<double,int> aw_t; 29 typedef pair<double, long> aw_t;
30 30
31 int n = A.size(); 31 long n = A.size();
32 vector<aw_t> AW(n); 32 vector<aw_t> AW(n);
33 for (int i = 0; i < n; i ++) 33 for (long i = 0; i < n; i ++)
34 AW[i] = make_pair(A[i], W[i]); 34 AW[i] = make_pair(A[i], W[i]);
35 35
36 long wtot = sum(W); 36 long wtot = sum(W);
37 37
38 int beg = 0; 38 long beg = 0;
39 int end = n - 1; 39 long end = n - 1;
40 40
41 while (true) { 41 while (true) {
42 int mid = (beg + end)/2; 42 long mid = (beg + end)/2;
43 nth_element(AW.begin(), AW.begin() + mid, AW.end(), 43 nth_element(AW.begin(), AW.begin() + mid, AW.end(),
44 [](const aw_t& l, const aw_t& r){return l.first > r.first;}); 44 [](const aw_t& l, const aw_t& r){return l.first > r.first;});
45 double trial = AW[mid].first; 45 double trial = AW[mid].first;
46
47 long wleft = 0, wright = 0; 46 long wleft = 0, wright = 0;
48 47
49 for (aw_t& aw: AW ) { 48 for (aw_t& aw: AW ) {
50 if (aw.first > trial) 49 if (aw.first > trial)
51 wleft += aw.second; 50 wleft += aw.second;
67 66
68 double medcouple(const std::vector<double>& X, 67 double medcouple(const std::vector<double>& X,
69 double eps1 = numeric_limits<double>::epsilon(), 68 double eps1 = numeric_limits<double>::epsilon(),
70 double eps2 = numeric_limits<double>::min()) 69 double eps2 = numeric_limits<double>::min())
71 { 70 {
72 size_t n = X.size(), n2 = (n - 1)/2; 71 long n = X.size(), n2 = (n - 1)/2;
73 72
74 if (n < 3) 73 if (n < 3)
75 return 0; 74 return 0;
76 75
77 vector<double> Z = X; 76 vector<double> Z = X;
104 copy_if(Z.begin(), Z.end(), back_inserter(Zplus), 103 copy_if(Z.begin(), Z.end(), back_inserter(Zplus),
105 [=](double z){return z >= -Zeps;}); 104 [=](double z){return z >= -Zeps;});
106 copy_if(Z.begin(), Z.end(), back_inserter(Zminus), 105 copy_if(Z.begin(), Z.end(), back_inserter(Zminus),
107 [=](double z){return Zeps >= z;}); 106 [=](double z){return Zeps >= z;});
108 107
109 size_t 108 long
110 n_plus = Zplus.size(), 109 n_plus = Zplus.size(),
111 n_minus = Zminus.size(); 110 n_minus = Zminus.size();
112 111
113 /* 112 /*
114 Kernel function h for the medcouple, closing over the values of 113 Kernel function h for the medcouple, closing over the values of
115 Zplus and Zminus just defined above. 114 Zplus and Zminus just defined above.
116 115
117 In case a and be are within epsilon of the median, the kernel 116 In case a and be are within epsilon of the median, the kernel
118 is the signum of their position. 117 is the signum of their position.
119 */ 118 */
120 auto h_kern = [&](int i, int j) { 119 auto h_kern = [&](long i, long j) {
121 double a = Zplus[i]; 120 double a = Zplus[i];
122 double b = Zminus[j]; 121 double b = Zminus[j];
123 122
124 double h; 123 double h;
125 if (abs(a - b) <= 2*eps2) 124 if (abs(a - b) <= 2*eps2)
129 128
130 return h; 129 return h;
131 }; 130 };
132 131
133 // Init left and right borders 132 // Init left and right borders
134 vector<int> L(n_plus, 0); 133 vector<long> L(n_plus, 0);
135 vector<int> R(n_plus, n_minus - 1); 134 vector<long> R(n_plus, n_minus - 1);
136 135
137 long Ltot = 0; 136 long Ltot = 0;
138 long Rtot = n_minus*n_plus; 137 long Rtot = n_minus*n_plus;
139 long medc_idx = Rtot/2; 138 long medc_idx = Rtot/2;
140 139
141 // kth pair algorithm (Johnson & Mizoguchi) 140 // kth pair algorithm (Johnson & Mizoguchi)
142 while (Rtot - Ltot > n_plus) { 141 while (Rtot - Ltot > n_plus) {
142
143 // First, compute the median inside the given bounds 143 // First, compute the median inside the given bounds
144 vector<double> A; 144 vector<double> A;
145 vector<int> W; 145 vector<long> W;
146 for (int i = 0; i < n_plus; i++) { 146 for (long i = 0; i < n_plus; i++) {
147 if (L[i] <= R[i]) { 147 if (L[i] <= R[i]) {
148 A.push_back(h_kern(i, (L[i] + R[i])/2)); 148 A.push_back(h_kern(i, (L[i] + R[i])/2));
149 W.push_back(R[i] - L[i] + 1); 149 W.push_back(R[i] - L[i] + 1);
150 } 150 }
151 } 151 }
153 153
154 double Am_eps = eps1*(eps1 + abs(Am)); 154 double Am_eps = eps1*(eps1 + abs(Am));
155 155
156 //Compute new left and right boundaries, based on the weighted 156 //Compute new left and right boundaries, based on the weighted
157 //median 157 //median
158 vector<int> P(n_plus), Q(n_plus); 158 vector<long> P(n_plus), Q(n_plus);
159 159
160 { 160 {
161 int j = 0; 161 long j = 0;
162 for (int i = n_plus - 1; i >= 0; i--) { 162 for (long i = n_plus - 1; i >= 0; i--) {
163 while (j < n_minus and h_kern(i, j) - Am > Am_eps) 163 while (j < n_minus and h_kern(i, j) - Am > Am_eps)
164 j++; 164 j++;
165 P[i] = j - 1; 165 P[i] = j - 1;
166 } 166 }
167 } 167 }
168 168
169 { 169 {
170 int j = n_minus - 1; 170 long j = n_minus - 1;
171 for (int i = 0; i < n_plus; i++) { 171 for (long i = 0; i < n_plus; i++) {
172 while (j >= 0 and h_kern(i, j) - Am < -Am_eps) 172 while (j >= 0 and h_kern(i, j) - Am < -Am_eps)
173 j--; 173 j--;
174 Q[i] = j + 1; 174 Q[i] = j + 1;
175 } 175 }
176 } 176 }
195 195
196 // Didn't find the median, but now we have a very small search space 196 // Didn't find the median, but now we have a very small search space
197 // to find it in, just between the left and right boundaries. This 197 // to find it in, just between the left and right boundaries. This
198 // space is of size Rtot - Ltot which is <= n_plus 198 // space is of size Rtot - Ltot which is <= n_plus
199 vector<double> A; 199 vector<double> A;
200 for (int i = 0; i < n_plus; i++) { 200 for (long i = 0; i < n_plus; i++) {
201 for (int j = L[i]; j <= R[i]; j++) 201 for (long j = L[i]; j <= R[i]; j++)
202 A.push_back(h_kern(i, j)); 202 A.push_back(h_kern(i, j));
203 } 203 }
204 nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(), 204 nth_element(A.begin(), A.begin() + medc_idx - Ltot, A.end(),
205 [](double x, double y) {return x > y;}); 205 [](double x, double y) {return x > y;});
206 206