Mercurial > hg > octave-nkf
annotate liboctave/oct-fftw.cc @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | a1dbe9d80eee |
children | 25bc2d31e1bf |
rev | line source |
---|---|
3828 | 1 /* |
2 | |
7017 | 3 Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007 John W. Eaton |
4 | |
3828 | 5 This file is part of Octave. |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
7016 | 9 Free Software Foundation; either version 3 of the License, or (at your |
10 option) any later version. | |
3828 | 11 |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
7016 | 18 along with Octave; see the file COPYING. If not, see |
19 <http://www.gnu.org/licenses/>. | |
3828 | 20 |
21 */ | |
22 | |
23 #ifdef HAVE_CONFIG_H | |
24 #include <config.h> | |
25 #endif | |
26 | |
4773 | 27 #if defined (HAVE_FFTW3) |
3828 | 28 |
4775 | 29 #include <iostream> |
30 #include <vector> | |
31 | |
4786 | 32 #include "lo-error.h" |
3828 | 33 #include "oct-fftw.h" |
4786 | 34 #include "quit.h" |
3828 | 35 |
4809 | 36 // Helper class to create and cache fftw plans for both 1d and |
6228 | 37 // 2d. This implementation defaults to using FFTW_ESTIMATE to create |
38 // the plans, which in theory is suboptimal, but provides quit | |
39 // reasonable performance. | |
4773 | 40 |
41 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3 | |
6228 | 42 // destroys the input and output arrays. We must therefore create a |
43 // temporary input array with the same size and 16-byte alignment as | |
44 // the original array and use that for the planner. Note that we also | |
45 // use any wisdom that is available, either in a FFTW3 system wide file | |
46 // or as supplied by the user. | |
4773 | 47 |
5775 | 48 // FIXME -- if we can ensure 16 byte alignment in Array<T> |
4809 | 49 // (<T> *data) the FFTW3 can use SIMD instructions for further |
50 // acceleration. | |
4773 | 51 |
4809 | 52 // Note that it is profitable to store the FFTW3 plans, for small |
53 // ffts. | |
3828 | 54 |
4808 | 55 octave_fftw_planner::octave_fftw_planner (void) |
3828 | 56 { |
6228 | 57 meth = ESTIMATE; |
3828 | 58 |
59 plan[0] = plan[1] = 0; | |
4773 | 60 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; |
4808 | 61 simd_align[0] = simd_align[1] = false; |
5044 | 62 inplace[0] = inplace[1] = false; |
4808 | 63 n[0] = n[1] = dim_vector (); |
4773 | 64 |
65 rplan = 0; | |
66 rd = rs = rr = rh = 0; | |
4808 | 67 rsimd_align = false; |
4773 | 68 rn = dim_vector (); |
5044 | 69 |
4809 | 70 // If we have a system wide wisdom file, import it. |
4808 | 71 fftw_import_system_wisdom (); |
3828 | 72 } |
73 | |
6228 | 74 octave_fftw_planner::FftwMethod |
75 octave_fftw_planner::method (void) | |
76 { | |
77 return meth; | |
78 } | |
79 | |
80 octave_fftw_planner::FftwMethod | |
81 octave_fftw_planner::method (FftwMethod _meth) | |
82 { | |
83 FftwMethod ret = meth; | |
84 if (_meth == ESTIMATE || _meth == MEASURE || | |
85 _meth == PATIENT || _meth == EXHAUSTIVE || | |
86 _meth == HYBRID) | |
87 { | |
88 if (meth != _meth) | |
89 { | |
90 meth = _meth; | |
91 if (rplan) | |
92 fftw_destroy_plan (rplan); | |
93 if (plan[0]) | |
94 fftw_destroy_plan (plan[0]); | |
95 if (plan[1]) | |
96 fftw_destroy_plan (plan[1]); | |
97 rplan = plan[0] = plan[1] = 0; | |
98 } | |
99 } | |
100 else | |
101 ret = UNKNOWN; | |
102 return ret; | |
103 } | |
104 | |
4808 | 105 #define CHECK_SIMD_ALIGNMENT(x) \ |
6482 | 106 (((reinterpret_cast<ptrdiff_t> (x)) & 0xF) == 0) |
4808 | 107 |
3828 | 108 fftw_plan |
4773 | 109 octave_fftw_planner::create_plan (int dir, const int rank, |
5275 | 110 const dim_vector dims, octave_idx_type howmany, |
111 octave_idx_type stride, octave_idx_type dist, | |
4773 | 112 const Complex *in, Complex *out) |
3828 | 113 { |
4773 | 114 int which = (dir == FFTW_FORWARD) ? 0 : 1; |
3828 | 115 fftw_plan *cur_plan_p = &plan[which]; |
116 bool create_new_plan = false; | |
4808 | 117 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
5044 | 118 bool ioinplace = (in == out); |
3828 | 119 |
4809 | 120 // Don't create a new plan if we have a non SIMD plan already but |
121 // can do SIMD. This prevents endlessly recreating plans if we | |
122 // change the alignment. | |
123 | |
4783 | 124 if (plan[which] == 0 || d[which] != dist || s[which] != stride |
5044 | 125 || r[which] != rank || h[which] != howmany |
126 || ioinplace != inplace[which] | |
4808 | 127 || ((ioalign != simd_align[which]) ? !ioalign : false)) |
4773 | 128 create_new_plan = true; |
129 else | |
4809 | 130 { |
131 // We still might not have the same shape of array. | |
132 | |
133 for (int i = 0; i < rank; i++) | |
134 if (dims(i) != n[which](i)) | |
135 { | |
136 create_new_plan = true; | |
137 break; | |
138 } | |
139 } | |
3828 | 140 |
141 if (create_new_plan) | |
142 { | |
4773 | 143 d[which] = dist; |
144 s[which] = stride; | |
145 r[which] = rank; | |
146 h[which] = howmany; | |
4808 | 147 simd_align[which] = ioalign; |
5044 | 148 inplace[which] = ioinplace; |
4773 | 149 n[which] = dims; |
150 | |
6228 | 151 // Note reversal of dimensions for column major storage in FFTW. |
152 octave_idx_type nn = 1; | |
153 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
154 | |
155 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
156 { | |
157 tmp[i] = dims(j); | |
158 nn *= dims(j); | |
159 } | |
160 | |
161 int plan_flags = 0; | |
162 bool plan_destroys_in = true; | |
163 | |
164 switch (meth) | |
165 { | |
166 case UNKNOWN: | |
167 case ESTIMATE: | |
168 plan_flags |= FFTW_ESTIMATE; | |
169 plan_destroys_in = false; | |
170 break; | |
171 case MEASURE: | |
172 plan_flags |= FFTW_MEASURE; | |
173 break; | |
174 case PATIENT: | |
175 plan_flags |= FFTW_PATIENT; | |
176 break; | |
177 case EXHAUSTIVE: | |
178 plan_flags |= FFTW_EXHAUSTIVE; | |
179 break; | |
180 case HYBRID: | |
181 if (nn < 8193) | |
182 plan_flags |= FFTW_MEASURE; | |
183 else | |
184 { | |
185 plan_flags |= FFTW_ESTIMATE; | |
186 plan_destroys_in = false; | |
187 } | |
188 break; | |
189 } | |
190 | |
4808 | 191 if (ioalign) |
192 plan_flags &= ~FFTW_UNALIGNED; | |
193 else | |
194 plan_flags |= FFTW_UNALIGNED; | |
195 | |
3828 | 196 if (*cur_plan_p) |
197 fftw_destroy_plan (*cur_plan_p); | |
198 | |
6228 | 199 if (plan_destroys_in) |
200 { | |
201 // Create matrix with the same size and 16-byte alignment as input | |
202 OCTAVE_LOCAL_BUFFER (Complex, itmp, nn * howmany + 32); | |
203 itmp = reinterpret_cast<Complex *> | |
204 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + | |
205 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); | |
4809 | 206 |
6228 | 207 *cur_plan_p = |
208 fftw_plan_many_dft (rank, tmp, howmany, | |
209 reinterpret_cast<fftw_complex *> (itmp), | |
210 0, stride, dist, reinterpret_cast<fftw_complex *> (out), | |
211 0, stride, dist, dir, plan_flags); | |
212 } | |
213 else | |
214 { | |
215 *cur_plan_p = | |
216 fftw_plan_many_dft (rank, tmp, howmany, | |
4773 | 217 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)), |
4774 | 218 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
219 0, stride, dist, dir, plan_flags); | |
6228 | 220 } |
3828 | 221 |
222 if (*cur_plan_p == 0) | |
223 (*current_liboctave_error_handler) ("Error creating fftw plan"); | |
224 } | |
225 | |
226 return *cur_plan_p; | |
227 } | |
228 | |
4773 | 229 fftw_plan |
230 octave_fftw_planner::create_plan (const int rank, const dim_vector dims, | |
5275 | 231 octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, |
4773 | 232 const double *in, Complex *out) |
3828 | 233 { |
4773 | 234 fftw_plan *cur_plan_p = &rplan; |
3828 | 235 bool create_new_plan = false; |
4808 | 236 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
3828 | 237 |
4809 | 238 // Don't create a new plan if we have a non SIMD plan already but |
239 // can do SIMD. This prevents endlessly recreating plans if we | |
240 // change the alignment. | |
241 | |
4783 | 242 if (rplan == 0 || rd != dist || rs != stride || rr != rank |
4808 | 243 || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false)) |
4773 | 244 create_new_plan = true; |
245 else | |
4809 | 246 { |
247 // We still might not have the same shape of array. | |
248 | |
249 for (int i = 0; i < rank; i++) | |
250 if (dims(i) != rn(i)) | |
251 { | |
252 create_new_plan = true; | |
253 break; | |
254 } | |
255 } | |
3828 | 256 |
257 if (create_new_plan) | |
258 { | |
4773 | 259 rd = dist; |
260 rs = stride; | |
261 rr = rank; | |
262 rh = howmany; | |
4808 | 263 rsimd_align = ioalign; |
4773 | 264 rn = dims; |
265 | |
6228 | 266 // Note reversal of dimensions for column major storage in FFTW. |
267 octave_idx_type nn = 1; | |
268 OCTAVE_LOCAL_BUFFER (int, tmp, rank); | |
269 | |
270 for (int i = 0, j = rank-1; i < rank; i++, j--) | |
271 { | |
272 tmp[i] = dims(j); | |
273 nn *= dims(j); | |
274 } | |
275 | |
276 int plan_flags = 0; | |
277 bool plan_destroys_in = true; | |
278 | |
279 switch (meth) | |
280 { | |
281 case UNKNOWN: | |
282 case ESTIMATE: | |
283 plan_flags |= FFTW_ESTIMATE; | |
284 plan_destroys_in = false; | |
285 break; | |
286 case MEASURE: | |
287 plan_flags |= FFTW_MEASURE; | |
288 break; | |
289 case PATIENT: | |
290 plan_flags |= FFTW_PATIENT; | |
291 break; | |
292 case EXHAUSTIVE: | |
293 plan_flags |= FFTW_EXHAUSTIVE; | |
294 break; | |
295 case HYBRID: | |
296 if (nn < 8193) | |
297 plan_flags |= FFTW_MEASURE; | |
298 else | |
299 { | |
300 plan_flags |= FFTW_ESTIMATE; | |
301 plan_destroys_in = false; | |
302 } | |
303 break; | |
304 } | |
305 | |
4808 | 306 if (ioalign) |
307 plan_flags &= ~FFTW_UNALIGNED; | |
308 else | |
309 plan_flags |= FFTW_UNALIGNED; | |
310 | |
3828 | 311 if (*cur_plan_p) |
4773 | 312 fftw_destroy_plan (*cur_plan_p); |
3828 | 313 |
6228 | 314 if (plan_destroys_in) |
315 { | |
316 // Create matrix with the same size and 16-byte alignment as input | |
317 OCTAVE_LOCAL_BUFFER (double, itmp, nn + 32); | |
318 itmp = reinterpret_cast<double *> | |
319 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + | |
320 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); | |
4809 | 321 |
6228 | 322 *cur_plan_p = |
323 fftw_plan_many_dft_r2c (rank, tmp, howmany, itmp, | |
324 0, stride, dist, reinterpret_cast<fftw_complex *> (out), | |
325 0, stride, dist, plan_flags); | |
326 } | |
327 else | |
328 { | |
329 *cur_plan_p = | |
330 fftw_plan_many_dft_r2c (rank, tmp, howmany, | |
4773 | 331 (const_cast<double *> (in)), |
4774 | 332 0, stride, dist, reinterpret_cast<fftw_complex *> (out), |
333 0, stride, dist, plan_flags); | |
6228 | 334 } |
3828 | 335 |
336 if (*cur_plan_p == 0) | |
4773 | 337 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
3828 | 338 } |
339 | |
340 return *cur_plan_p; | |
341 } | |
342 | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
343 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
344 octave_float_fftw_planner::octave_float_fftw_planner (void) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
345 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
346 meth = ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
347 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
348 plan[0] = plan[1] = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
349 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
350 simd_align[0] = simd_align[1] = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
351 inplace[0] = inplace[1] = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
352 n[0] = n[1] = dim_vector (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
353 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
354 rplan = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
355 rd = rs = rr = rh = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
356 rsimd_align = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
357 rn = dim_vector (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
358 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
359 // If we have a system wide wisdom file, import it. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
360 fftwf_import_system_wisdom (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
361 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
362 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
363 octave_float_fftw_planner::FftwMethod |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
364 octave_float_fftw_planner::method (void) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
365 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
366 return meth; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
367 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
368 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
369 octave_float_fftw_planner::FftwMethod |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
370 octave_float_fftw_planner::method (FftwMethod _meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
371 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
372 FftwMethod ret = meth; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
373 if (_meth == ESTIMATE || _meth == MEASURE || |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
374 _meth == PATIENT || _meth == EXHAUSTIVE || |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
375 _meth == HYBRID) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
376 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
377 if (meth != _meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
378 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
379 meth = _meth; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
380 if (rplan) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
381 fftwf_destroy_plan (rplan); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
382 if (plan[0]) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
383 fftwf_destroy_plan (plan[0]); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
384 if (plan[1]) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
385 fftwf_destroy_plan (plan[1]); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
386 rplan = plan[0] = plan[1] = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
387 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
388 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
389 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
390 ret = UNKNOWN; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
391 return ret; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
392 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
393 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
394 fftwf_plan |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
395 octave_float_fftw_planner::create_plan (int dir, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
396 const dim_vector dims, octave_idx_type howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
397 octave_idx_type stride, octave_idx_type dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
398 const FloatComplex *in, FloatComplex *out) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
399 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
400 int which = (dir == FFTW_FORWARD) ? 0 : 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
401 fftwf_plan *cur_plan_p = &plan[which]; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
402 bool create_new_plan = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
403 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
404 bool ioinplace = (in == out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
405 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
406 // Don't create a new plan if we have a non SIMD plan already but |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
407 // can do SIMD. This prevents endlessly recreating plans if we |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
408 // change the alignment. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
409 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
410 if (plan[which] == 0 || d[which] != dist || s[which] != stride |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
411 || r[which] != rank || h[which] != howmany |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
412 || ioinplace != inplace[which] |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
413 || ((ioalign != simd_align[which]) ? !ioalign : false)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
414 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
415 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
416 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
417 // We still might not have the same shape of array. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
418 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
419 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
420 if (dims(i) != n[which](i)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
421 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
422 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
423 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
424 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
425 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
426 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
427 if (create_new_plan) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
428 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
429 d[which] = dist; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
430 s[which] = stride; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
431 r[which] = rank; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
432 h[which] = howmany; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
433 simd_align[which] = ioalign; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
434 inplace[which] = ioinplace; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
435 n[which] = dims; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
436 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
437 // Note reversal of dimensions for column major storage in FFTW. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
438 octave_idx_type nn = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
439 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
440 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
441 for (int i = 0, j = rank-1; i < rank; i++, j--) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
442 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
443 tmp[i] = dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
444 nn *= dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
445 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
446 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
447 int plan_flags = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
448 bool plan_destroys_in = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
449 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
450 switch (meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
451 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
452 case UNKNOWN: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
453 case ESTIMATE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
454 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
455 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
456 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
457 case MEASURE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
458 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
459 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
460 case PATIENT: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
461 plan_flags |= FFTW_PATIENT; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
462 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
463 case EXHAUSTIVE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
464 plan_flags |= FFTW_EXHAUSTIVE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
465 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
466 case HYBRID: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
467 if (nn < 8193) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
468 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
469 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
470 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
471 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
472 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
473 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
474 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
475 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
476 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
477 if (ioalign) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
478 plan_flags &= ~FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
479 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
480 plan_flags |= FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
481 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
482 if (*cur_plan_p) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
483 fftwf_destroy_plan (*cur_plan_p); |
3828 | 484 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
485 if (plan_destroys_in) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
486 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
487 // Create matrix with the same size and 16-byte alignment as input |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
488 OCTAVE_LOCAL_BUFFER (FloatComplex, itmp, nn * howmany + 32); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
489 itmp = reinterpret_cast<FloatComplex *> |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
490 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
491 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
492 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
493 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
494 fftwf_plan_many_dft (rank, tmp, howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
495 reinterpret_cast<fftwf_complex *> (itmp), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
496 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
497 0, stride, dist, dir, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
498 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
499 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
500 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
501 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
502 fftwf_plan_many_dft (rank, tmp, howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
503 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *> (in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
504 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
505 0, stride, dist, dir, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
506 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
507 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
508 if (*cur_plan_p == 0) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
509 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
510 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
511 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
512 return *cur_plan_p; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
513 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
514 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
515 fftwf_plan |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
516 octave_float_fftw_planner::create_plan (const int rank, const dim_vector dims, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
517 octave_idx_type howmany, octave_idx_type stride, octave_idx_type dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
518 const float *in, FloatComplex *out) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
519 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
520 fftwf_plan *cur_plan_p = &rplan; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
521 bool create_new_plan = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
522 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
523 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
524 // Don't create a new plan if we have a non SIMD plan already but |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
525 // can do SIMD. This prevents endlessly recreating plans if we |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
526 // change the alignment. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
527 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
528 if (rplan == 0 || rd != dist || rs != stride || rr != rank |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
529 || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
530 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
531 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
532 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
533 // We still might not have the same shape of array. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
534 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
535 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
536 if (dims(i) != rn(i)) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
537 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
538 create_new_plan = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
539 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
540 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
541 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
542 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
543 if (create_new_plan) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
544 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
545 rd = dist; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
546 rs = stride; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
547 rr = rank; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
548 rh = howmany; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
549 rsimd_align = ioalign; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
550 rn = dims; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
551 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
552 // Note reversal of dimensions for column major storage in FFTW. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
553 octave_idx_type nn = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
554 OCTAVE_LOCAL_BUFFER (int, tmp, rank); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
555 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
556 for (int i = 0, j = rank-1; i < rank; i++, j--) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
557 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
558 tmp[i] = dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
559 nn *= dims(j); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
560 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
561 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
562 int plan_flags = 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
563 bool plan_destroys_in = true; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
564 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
565 switch (meth) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
566 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
567 case UNKNOWN: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
568 case ESTIMATE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
569 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
570 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
571 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
572 case MEASURE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
573 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
574 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
575 case PATIENT: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
576 plan_flags |= FFTW_PATIENT; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
577 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
578 case EXHAUSTIVE: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
579 plan_flags |= FFTW_EXHAUSTIVE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
580 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
581 case HYBRID: |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
582 if (nn < 8193) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
583 plan_flags |= FFTW_MEASURE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
584 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
585 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
586 plan_flags |= FFTW_ESTIMATE; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
587 plan_destroys_in = false; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
588 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
589 break; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
590 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
591 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
592 if (ioalign) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
593 plan_flags &= ~FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
594 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
595 plan_flags |= FFTW_UNALIGNED; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
596 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
597 if (*cur_plan_p) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
598 fftwf_destroy_plan (*cur_plan_p); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
599 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
600 if (plan_destroys_in) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
601 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
602 // Create matrix with the same size and 16-byte alignment as input |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
603 OCTAVE_LOCAL_BUFFER (float, itmp, nn + 32); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
604 itmp = reinterpret_cast<float *> |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
605 (((reinterpret_cast<ptrdiff_t>(itmp) + 15) & ~ 0xF) + |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
606 ((reinterpret_cast<ptrdiff_t> (in)) & 0xF)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
607 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
608 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
609 fftwf_plan_many_dft_r2c (rank, tmp, howmany, itmp, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
610 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
611 0, stride, dist, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
612 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
613 else |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
614 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
615 *cur_plan_p = |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
616 fftwf_plan_many_dft_r2c (rank, tmp, howmany, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
617 (const_cast<float *> (in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
618 0, stride, dist, reinterpret_cast<fftwf_complex *> (out), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
619 0, stride, dist, plan_flags); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
620 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
621 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
622 if (*cur_plan_p == 0) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
623 (*current_liboctave_error_handler) ("Error creating fftw plan"); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
624 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
625 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
626 return *cur_plan_p; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
627 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
628 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
629 octave_fftw_planner fftw_planner; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
630 octave_float_fftw_planner float_fftw_planner; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
631 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
632 template <class T> |
4775 | 633 static inline void |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
634 convert_packcomplex_1d (T *out, size_t nr, size_t nc, |
5275 | 635 octave_idx_type stride, octave_idx_type dist) |
4773 | 636 { |
4785 | 637 OCTAVE_QUIT; |
638 | |
639 // Fill in the missing data. | |
640 | |
4773 | 641 for (size_t i = 0; i < nr; i++) |
642 for (size_t j = nc/2+1; j < nc; j++) | |
643 out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]); | |
4785 | 644 |
645 OCTAVE_QUIT; | |
4773 | 646 } |
647 | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
648 template <class T> |
4775 | 649 static inline void |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
650 convert_packcomplex_Nd (T *out, const dim_vector &dv) |
3828 | 651 { |
4773 | 652 size_t nc = dv(0); |
653 size_t nr = dv(1); | |
4808 | 654 size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1); |
4773 | 655 size_t nrp = nr * np; |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
656 T *ptr1, *ptr2; |
4773 | 657 |
4785 | 658 OCTAVE_QUIT; |
659 | |
660 // Create space for the missing elements. | |
661 | |
4773 | 662 for (size_t i = 0; i < nrp; i++) |
663 { | |
664 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2); | |
665 ptr2 = out + i * nc; | |
666 for (size_t j = 0; j < nc/2+1; j++) | |
667 *ptr2++ = *ptr1++; | |
668 } | |
669 | |
4785 | 670 OCTAVE_QUIT; |
671 | |
672 // Fill in the missing data for the rank = 2 case directly for speed. | |
673 | |
4773 | 674 for (size_t i = 0; i < np; i++) |
675 { | |
676 for (size_t j = 1; j < nr; j++) | |
677 for (size_t k = nc/2+1; k < nc; k++) | |
678 out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]); | |
679 | |
680 for (size_t j = nc/2+1; j < nc; j++) | |
681 out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]); | |
682 } | |
683 | |
4785 | 684 OCTAVE_QUIT; |
685 | |
686 // Now do the permutations needed for rank > 2 cases. | |
687 | |
4773 | 688 size_t jstart = dv(0) * dv(1); |
689 size_t kstep = dv(0); | |
690 size_t nel = dv.numel (); | |
4785 | 691 |
4808 | 692 for (int inner = 2; inner < dv.length (); inner++) |
4773 | 693 { |
694 size_t jmax = jstart * dv(inner); | |
695 for (size_t i = 0; i < nel; i+=jmax) | |
696 for (size_t j = jstart, jj = jmax-jstart; j < jj; | |
697 j+=jstart, jj-=jstart) | |
698 for (size_t k = 0; k < jstart; k+= kstep) | |
699 for (size_t l = nc/2+1; l < nc; l++) | |
700 { | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
701 T tmp = out[i+ j + k + l]; |
4773 | 702 out[i + j + k + l] = out[i + jj + k + l]; |
703 out[i + jj + k + l] = tmp; | |
704 } | |
705 jstart = jmax; | |
706 } | |
4785 | 707 |
708 OCTAVE_QUIT; | |
4773 | 709 } |
710 | |
711 int | |
712 octave_fftw::fft (const double *in, Complex *out, size_t npts, | |
5275 | 713 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
4773 | 714 { |
715 dist = (dist < 0 ? npts : dist); | |
716 | |
717 dim_vector dv (npts); | |
718 fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist, | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
719 in, out); |
4773 | 720 |
721 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
722 reinterpret_cast<fftw_complex *> (out)); |
4773 | 723 |
4809 | 724 // Need to create other half of the transform. |
725 | |
4773 | 726 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
3828 | 727 |
728 return 0; | |
729 } | |
730 | |
731 int | |
4773 | 732 octave_fftw::fft (const Complex *in, Complex *out, size_t npts, |
5275 | 733 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
3828 | 734 { |
4773 | 735 dist = (dist < 0 ? npts : dist); |
736 | |
737 dim_vector dv (npts); | |
738 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, | |
739 stride, dist, in, out); | |
740 | |
741 fftw_execute_dft (plan, | |
742 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
743 reinterpret_cast<fftw_complex *> (out)); | |
744 | |
745 return 0; | |
746 } | |
747 | |
748 int | |
749 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, | |
5275 | 750 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
4773 | 751 { |
752 dist = (dist < 0 ? npts : dist); | |
753 | |
754 dim_vector dv (npts); | |
755 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, | |
756 stride, dist, in, out); | |
757 | |
758 fftw_execute_dft (plan, | |
759 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
760 reinterpret_cast<fftw_complex *> (out)); | |
3828 | 761 |
762 const Complex scale = npts; | |
4773 | 763 for (size_t j = 0; j < nsamples; j++) |
764 for (size_t i = 0; i < npts; i++) | |
765 out[i*stride + j*dist] /= scale; | |
3828 | 766 |
767 return 0; | |
768 } | |
769 | |
770 int | |
4773 | 771 octave_fftw::fftNd (const double *in, Complex *out, const int rank, |
772 const dim_vector &dv) | |
3828 | 773 { |
5275 | 774 octave_idx_type dist = 1; |
4773 | 775 for (int i = 0; i < rank; i++) |
776 dist *= dv(i); | |
777 | |
778 // Fool with the position of the start of the output matrix, so that | |
4809 | 779 // creating other half of the matrix won't cause cache problems. |
780 | |
5275 | 781 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); |
4773 | 782 |
783 fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist, | |
784 in, out + offset); | |
785 | |
786 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)), | |
787 reinterpret_cast<fftw_complex *> (out+ offset)); | |
788 | |
4809 | 789 // Need to create other half of the transform. |
790 | |
4773 | 791 convert_packcomplex_Nd (out, dv); |
3828 | 792 |
793 return 0; | |
794 } | |
795 | |
796 int | |
4773 | 797 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank, |
798 const dim_vector &dv) | |
3828 | 799 { |
5275 | 800 octave_idx_type dist = 1; |
4773 | 801 for (int i = 0; i < rank; i++) |
802 dist *= dv(i); | |
803 | |
804 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, | |
805 dist, in, out); | |
806 | |
807 fftw_execute_dft (plan, | |
808 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
809 reinterpret_cast<fftw_complex *> (out)); | |
810 | |
811 return 0; | |
812 } | |
3828 | 813 |
4773 | 814 int |
815 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank, | |
4784 | 816 const dim_vector &dv) |
4773 | 817 { |
5275 | 818 octave_idx_type dist = 1; |
4773 | 819 for (int i = 0; i < rank; i++) |
820 dist *= dv(i); | |
821 | |
822 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, | |
823 dist, in, out); | |
824 | |
825 fftw_execute_dft (plan, | |
826 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)), | |
827 reinterpret_cast<fftw_complex *> (out)); | |
828 | |
829 const size_t npts = dv.numel (); | |
3828 | 830 const Complex scale = npts; |
831 for (size_t i = 0; i < npts; i++) | |
4773 | 832 out[i] /= scale; |
3828 | 833 |
834 return 0; | |
835 } | |
836 | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
837 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
838 octave_fftw::fft (const float *in, FloatComplex *out, size_t npts, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
839 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
840 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
841 dist = (dist < 0 ? npts : dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
842 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
843 dim_vector dv (npts); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
844 fftwf_plan plan = float_fftw_planner.create_plan (1, dv, nsamples, stride, dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
845 in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
846 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
847 fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
848 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
849 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
850 // Need to create other half of the transform. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
851 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
852 convert_packcomplex_1d (out, nsamples, npts, stride, dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
853 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
854 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
855 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
856 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
857 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
858 octave_fftw::fft (const FloatComplex *in, FloatComplex *out, size_t npts, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
859 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
860 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
861 dist = (dist < 0 ? npts : dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
862 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
863 dim_vector dv (npts); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
864 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
865 stride, dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
866 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
867 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
868 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
869 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
870 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
871 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
872 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
873 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
874 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
875 octave_fftw::ifft (const FloatComplex *in, FloatComplex *out, size_t npts, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
876 size_t nsamples, octave_idx_type stride, octave_idx_type dist) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
877 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
878 dist = (dist < 0 ? npts : dist); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
879 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
880 dim_vector dv (npts); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
881 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
882 stride, dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
883 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
884 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
885 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
886 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
887 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
888 const FloatComplex scale = npts; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
889 for (size_t j = 0; j < nsamples; j++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
890 for (size_t i = 0; i < npts; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
891 out[i*stride + j*dist] /= scale; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
892 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
893 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
894 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
895 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
896 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
897 octave_fftw::fftNd (const float *in, FloatComplex *out, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
898 const dim_vector &dv) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
899 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
900 octave_idx_type dist = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
901 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
902 dist *= dv(i); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
903 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
904 // Fool with the position of the start of the output matrix, so that |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
905 // creating other half of the matrix won't cause cache problems. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
906 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
907 octave_idx_type offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
908 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
909 fftwf_plan plan = float_fftw_planner.create_plan (rank, dv, 1, 1, dist, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
910 in, out + offset); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
911 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
912 fftwf_execute_dft_r2c (plan, (const_cast<float *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
913 reinterpret_cast<fftwf_complex *> (out+ offset)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
914 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
915 // Need to create other half of the transform. |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
916 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
917 convert_packcomplex_Nd (out, dv); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
918 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
919 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
920 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
921 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
922 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
923 octave_fftw::fftNd (const FloatComplex *in, FloatComplex *out, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
924 const dim_vector &dv) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
925 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
926 octave_idx_type dist = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
927 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
928 dist *= dv(i); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
929 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
930 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
931 dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
932 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
933 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
934 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
935 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
936 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
937 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
938 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
939 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
940 int |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
941 octave_fftw::ifftNd (const FloatComplex *in, FloatComplex *out, const int rank, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
942 const dim_vector &dv) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
943 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
944 octave_idx_type dist = 1; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
945 for (int i = 0; i < rank; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
946 dist *= dv(i); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
947 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
948 fftwf_plan plan = float_fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
949 dist, in, out); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
950 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
951 fftwf_execute_dft (plan, |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
952 reinterpret_cast<fftwf_complex *> (const_cast<FloatComplex *>(in)), |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
953 reinterpret_cast<fftwf_complex *> (out)); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
954 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
955 const size_t npts = dv.numel (); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
956 const FloatComplex scale = npts; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
957 for (size_t i = 0; i < npts; i++) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
958 out[i] /= scale; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
959 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
960 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
961 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
962 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
963 |
3828 | 964 #endif |
965 | |
966 /* | |
967 ;;; Local Variables: *** | |
968 ;;; mode: C++ *** | |
969 ;;; End: *** | |
970 */ | |
971 |