summaryrefslogtreecommitdiff
path: root/kiss_fft.c
diff options
context:
space:
mode:
Diffstat (limited to 'kiss_fft.c')
-rw-r--r--kiss_fft.c714
1 files changed, 366 insertions, 348 deletions
diff --git a/kiss_fft.c b/kiss_fft.c
index 05f7f27..e7993cf 100644
--- a/kiss_fft.c
+++ b/kiss_fft.c
@@ -3,406 +3,424 @@ Copyright (c) 2003-2010, Mark Borgerding
3 3
4All rights reserved. 4All rights reserved.
5 5
6Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 6Redistribution and use in source and binary forms, with or without modification,
7 7are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 * Redistributions of source code must retain the above copyright notice,
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10this list of conditions and the following disclaimer.
11 11 * Redistributions in binary form must reproduce the above copyright notice,
12THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12this list of conditions and the following disclaimer in the documentation and/or
13other materials provided with the distribution.
14 * Neither the author nor the names of any contributors may be used to
15endorse or promote products derived from this software without specific prior
16written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*/ 28*/
14 29
15
16#include "_kiss_fft_guts.h" 30#include "_kiss_fft_guts.h"
17/* The guts header contains all the multiplication and addition macros that are defined for 31/* The guts header contains all the multiplication and addition macros that are
18 fixed or floating point complex numbers. It also delares the kf_ internal functions. 32 defined for fixed or floating point complex numbers. It also declares the kf_
33 internal functions.
19 */ 34 */
20 35
21static void kf_bfly2( 36static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride,
22 kiss_fft_cpx * Fout, 37 const kiss_fft_cfg st, int m) {
23 const size_t fstride, 38 kiss_fft_cpx *Fout2;
24 const kiss_fft_cfg st, 39 kiss_fft_cpx *tw1 = st->twiddles;
25 int m 40 kiss_fft_cpx t;
26 ) 41 Fout2 = Fout + m;
27{ 42 do {
28 kiss_fft_cpx * Fout2; 43 C_FIXDIV(*Fout, 2);
29 kiss_fft_cpx * tw1 = st->twiddles; 44 C_FIXDIV(*Fout2, 2);
30 kiss_fft_cpx t; 45
31 Fout2 = Fout + m; 46 C_MUL(t, *Fout2, *tw1);
32 do{ 47 tw1 += fstride;
33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); 48 C_SUB(*Fout2, *Fout, t);
34 49 C_ADDTO(*Fout, t);
35 C_MUL (t, *Fout2 , *tw1); 50 ++Fout2;
36 tw1 += fstride; 51 ++Fout;
37 C_SUB( *Fout2 , *Fout , t ); 52 } while (--m);
38 C_ADDTO( *Fout , t );
39 ++Fout2;
40 ++Fout;
41 }while (--m);
42} 53}
43 54
44static void kf_bfly4( 55static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride,
45 kiss_fft_cpx * Fout, 56 const kiss_fft_cfg st, const size_t m) {
46 const size_t fstride, 57 kiss_fft_cpx *tw1, *tw2, *tw3;
47 const kiss_fft_cfg st, 58 kiss_fft_cpx scratch[6];
48 const size_t m 59 size_t k = m;
49 ) 60 const size_t m2 = 2 * m;
50{ 61 const size_t m3 = 3 * m;
51 kiss_fft_cpx *tw1,*tw2,*tw3; 62
52 kiss_fft_cpx scratch[6]; 63 tw3 = tw2 = tw1 = st->twiddles;
53 size_t k=m; 64
54 const size_t m2=2*m; 65 do {
55 const size_t m3=3*m; 66 C_FIXDIV(*Fout, 4);
56 67 C_FIXDIV(Fout[m], 4);
57 68 C_FIXDIV(Fout[m2], 4);
58 tw3 = tw2 = tw1 = st->twiddles; 69 C_FIXDIV(Fout[m3], 4);
59 70
60 do { 71 C_MUL(scratch[0], Fout[m], *tw1);
61 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); 72 C_MUL(scratch[1], Fout[m2], *tw2);
62 73 C_MUL(scratch[2], Fout[m3], *tw3);
63 C_MUL(scratch[0],Fout[m] , *tw1 ); 74
64 C_MUL(scratch[1],Fout[m2] , *tw2 ); 75 C_SUB(scratch[5], *Fout, scratch[1]);
65 C_MUL(scratch[2],Fout[m3] , *tw3 ); 76 C_ADDTO(*Fout, scratch[1]);
66 77 C_ADD(scratch[3], scratch[0], scratch[2]);
67 C_SUB( scratch[5] , *Fout, scratch[1] ); 78 C_SUB(scratch[4], scratch[0], scratch[2]);
68 C_ADDTO(*Fout, scratch[1]); 79 C_SUB(Fout[m2], *Fout, scratch[3]);
69 C_ADD( scratch[3] , scratch[0] , scratch[2] ); 80 tw1 += fstride;
70 C_SUB( scratch[4] , scratch[0] , scratch[2] ); 81 tw2 += fstride * 2;
71 C_SUB( Fout[m2], *Fout, scratch[3] ); 82 tw3 += fstride * 3;
72 tw1 += fstride; 83 C_ADDTO(*Fout, scratch[3]);
73 tw2 += fstride*2; 84
74 tw3 += fstride*3; 85 if (st->inverse) {
75 C_ADDTO( *Fout , scratch[3] ); 86 Fout[m].r = scratch[5].r - scratch[4].i;
76 87 Fout[m].i = scratch[5].i + scratch[4].r;
77 if(st->inverse) { 88 Fout[m3].r = scratch[5].r + scratch[4].i;
78 Fout[m].r = scratch[5].r - scratch[4].i; 89 Fout[m3].i = scratch[5].i - scratch[4].r;
79 Fout[m].i = scratch[5].i + scratch[4].r; 90 } else {
80 Fout[m3].r = scratch[5].r + scratch[4].i; 91 Fout[m].r = scratch[5].r + scratch[4].i;
81 Fout[m3].i = scratch[5].i - scratch[4].r; 92 Fout[m].i = scratch[5].i - scratch[4].r;
82 }else{ 93 Fout[m3].r = scratch[5].r - scratch[4].i;
83 Fout[m].r = scratch[5].r + scratch[4].i; 94 Fout[m3].i = scratch[5].i + scratch[4].r;
84 Fout[m].i = scratch[5].i - scratch[4].r; 95 }
85 Fout[m3].r = scratch[5].r - scratch[4].i; 96 ++Fout;
86 Fout[m3].i = scratch[5].i + scratch[4].r; 97 } while (--k);
87 }
88 ++Fout;
89 }while(--k);
90} 98}
91 99
92static void kf_bfly3( 100static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
93 kiss_fft_cpx * Fout, 101 const kiss_fft_cfg st, size_t m) {
94 const size_t fstride, 102 size_t k = m;
95 const kiss_fft_cfg st, 103 const size_t m2 = 2 * m;
96 size_t m 104 kiss_fft_cpx *tw1, *tw2;
97 ) 105 kiss_fft_cpx scratch[5];
98{ 106 kiss_fft_cpx epi3;
99 size_t k=m; 107 epi3 = st->twiddles[fstride * m];
100 const size_t m2 = 2*m;
101 kiss_fft_cpx *tw1,*tw2;
102 kiss_fft_cpx scratch[5];
103 kiss_fft_cpx epi3;
104 epi3 = st->twiddles[fstride*m];
105 108
106 tw1=tw2=st->twiddles; 109 tw1 = tw2 = st->twiddles;
107 110
108 do{ 111 do {
109 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 112 C_FIXDIV(*Fout, 3);
113 C_FIXDIV(Fout[m], 3);
114 C_FIXDIV(Fout[m2], 3);
110 115
111 C_MUL(scratch[1],Fout[m] , *tw1); 116 C_MUL(scratch[1], Fout[m], *tw1);
112 C_MUL(scratch[2],Fout[m2] , *tw2); 117 C_MUL(scratch[2], Fout[m2], *tw2);
113 118
114 C_ADD(scratch[3],scratch[1],scratch[2]); 119 C_ADD(scratch[3], scratch[1], scratch[2]);
115 C_SUB(scratch[0],scratch[1],scratch[2]); 120 C_SUB(scratch[0], scratch[1], scratch[2]);
116 tw1 += fstride; 121 tw1 += fstride;
117 tw2 += fstride*2; 122 tw2 += fstride * 2;
118 123
119 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 124 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
120 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 125 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
121 126
122 C_MULBYSCALAR( scratch[0] , epi3.i ); 127 C_MULBYSCALAR(scratch[0], epi3.i);
123 128
124 C_ADDTO(*Fout,scratch[3]); 129 C_ADDTO(*Fout, scratch[3]);
125 130
126 Fout[m2].r = Fout[m].r + scratch[0].i; 131 Fout[m2].r = Fout[m].r + scratch[0].i;
127 Fout[m2].i = Fout[m].i - scratch[0].r; 132 Fout[m2].i = Fout[m].i - scratch[0].r;
128 133
129 Fout[m].r -= scratch[0].i; 134 Fout[m].r -= scratch[0].i;
130 Fout[m].i += scratch[0].r; 135 Fout[m].i += scratch[0].r;
131 136
132 ++Fout; 137 ++Fout;
133 }while(--k); 138 } while (--k);
134} 139}
135 140
136static void kf_bfly5( 141static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
137 kiss_fft_cpx * Fout, 142 const kiss_fft_cfg st, int m) {
138 const size_t fstride, 143 kiss_fft_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
139 const kiss_fft_cfg st, 144 int u;
140 int m 145 kiss_fft_cpx scratch[13];
141 ) 146 kiss_fft_cpx *twiddles = st->twiddles;
142{ 147 kiss_fft_cpx *tw;
143 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 148 kiss_fft_cpx ya, yb;
144 int u; 149 ya = twiddles[fstride * m];
145 kiss_fft_cpx scratch[13]; 150 yb = twiddles[fstride * 2 * m];
146 kiss_fft_cpx * twiddles = st->twiddles; 151
147 kiss_fft_cpx *tw; 152 Fout0 = Fout;
148 kiss_fft_cpx ya,yb; 153 Fout1 = Fout0 + m;
149 ya = twiddles[fstride*m]; 154 Fout2 = Fout0 + 2 * m;
150 yb = twiddles[fstride*2*m]; 155 Fout3 = Fout0 + 3 * m;
151 156 Fout4 = Fout0 + 4 * m;
152 Fout0=Fout; 157
153 Fout1=Fout0+m; 158 tw = st->twiddles;
154 Fout2=Fout0+2*m; 159 for (u = 0; u < m; ++u) {
155 Fout3=Fout0+3*m; 160 C_FIXDIV(*Fout0, 5);
156 Fout4=Fout0+4*m; 161 C_FIXDIV(*Fout1, 5);
157 162 C_FIXDIV(*Fout2, 5);
158 tw=st->twiddles; 163 C_FIXDIV(*Fout3, 5);
159 for ( u=0; u<m; ++u ) { 164 C_FIXDIV(*Fout4, 5);
160 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 165 scratch[0] = *Fout0;
161 scratch[0] = *Fout0; 166
162 167 C_MUL(scratch[1], *Fout1, tw[u * fstride]);
163 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 168 C_MUL(scratch[2], *Fout2, tw[2 * u * fstride]);
164 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 169 C_MUL(scratch[3], *Fout3, tw[3 * u * fstride]);
165 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 170 C_MUL(scratch[4], *Fout4, tw[4 * u * fstride]);
166 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 171
167 172 C_ADD(scratch[7], scratch[1], scratch[4]);
168 C_ADD( scratch[7],scratch[1],scratch[4]); 173 C_SUB(scratch[10], scratch[1], scratch[4]);
169 C_SUB( scratch[10],scratch[1],scratch[4]); 174 C_ADD(scratch[8], scratch[2], scratch[3]);
170 C_ADD( scratch[8],scratch[2],scratch[3]); 175 C_SUB(scratch[9], scratch[2], scratch[3]);
171 C_SUB( scratch[9],scratch[2],scratch[3]); 176
172 177 Fout0->r += scratch[7].r + scratch[8].r;
173 Fout0->r += scratch[7].r + scratch[8].r; 178 Fout0->i += scratch[7].i + scratch[8].i;
174 Fout0->i += scratch[7].i + scratch[8].i; 179
175 180 scratch[5].r =
176 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); 181 scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r);
177 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); 182 scratch[5].i =
178 183 scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
179 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); 184
180 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); 185 scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i);
181 186 scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i);
182 C_SUB(*Fout1,scratch[5],scratch[6]); 187
183 C_ADD(*Fout4,scratch[5],scratch[6]); 188 C_SUB(*Fout1, scratch[5], scratch[6]);
184 189 C_ADD(*Fout4, scratch[5], scratch[6]);
185 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); 190
186 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); 191 scratch[11].r =
187 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); 192 scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r);
188 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); 193 scratch[11].i =
189 194 scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
190 C_ADD(*Fout2,scratch[11],scratch[12]); 195 scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i);
191 C_SUB(*Fout3,scratch[11],scratch[12]); 196 scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i);
192 197
193 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 198 C_ADD(*Fout2, scratch[11], scratch[12]);
194 } 199 C_SUB(*Fout3, scratch[11], scratch[12]);
200
201 ++Fout0;
202 ++Fout1;
203 ++Fout2;
204 ++Fout3;
205 ++Fout4;
206 }
195} 207}
196 208
197/* perform the butterfly for one stage of a mixed radix FFT */ 209/* perform the butterfly for one stage of a mixed radix FFT */
198static void kf_bfly_generic( 210static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride,
199 kiss_fft_cpx * Fout, 211 const kiss_fft_cfg st, int m, int p) {
200 const size_t fstride, 212 int u, k, q1, q;
201 const kiss_fft_cfg st, 213 kiss_fft_cpx *twiddles = st->twiddles;
202 int m, 214 kiss_fft_cpx t;
203 int p 215 int Norig = st->nfft;
204 ) 216
205{ 217 kiss_fft_cpx *scratch =
206 int u,k,q1,q; 218 (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p);
207 kiss_fft_cpx * twiddles = st->twiddles; 219
208 kiss_fft_cpx t; 220 for (u = 0; u < m; ++u) {
209 int Norig = st->nfft; 221 k = u;
210 222 for (q1 = 0; q1 < p; ++q1) {
211 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); 223 scratch[q1] = Fout[k];
212 224 C_FIXDIV(scratch[q1], p);
213 for ( u=0; u<m; ++u ) { 225 k += m;
214 k=u;
215 for ( q1=0 ; q1<p ; ++q1 ) {
216 scratch[q1] = Fout[ k ];
217 C_FIXDIV(scratch[q1],p);
218 k += m;
219 }
220
221 k=u;
222 for ( q1=0 ; q1<p ; ++q1 ) {
223 int twidx=0;
224 Fout[ k ] = scratch[0];
225 for (q=1;q<p;++q ) {
226 twidx += fstride * k;
227 if (twidx>=Norig) twidx-=Norig;
228 C_MUL(t,scratch[q] , twiddles[twidx] );
229 C_ADDTO( Fout[ k ] ,t);
230 }
231 k += m;
232 }
233 } 226 }
234 KISS_FFT_TMP_FREE(scratch);
235}
236
237static
238void kf_work(
239 kiss_fft_cpx * Fout,
240 const kiss_fft_cpx * f,
241 const size_t fstride,
242 int in_stride,
243 int * factors,
244 const kiss_fft_cfg st
245 )
246{
247 kiss_fft_cpx * Fout_beg=Fout;
248 const int p=*factors++; /* the radix */
249 const int m=*factors++; /* stage's fft length/p */
250 const kiss_fft_cpx * Fout_end = Fout + p*m;
251 227
252#ifdef _OPENMP 228 k = u;
253 // use openmp extensions at the 229 for (q1 = 0; q1 < p; ++q1) {
254 // top-level (not recursive) 230 int twidx = 0;
255 if (fstride==1 && p<=5) 231 Fout[k] = scratch[0];
256 { 232 for (q = 1; q < p; ++q) {
257 int k; 233 twidx += fstride * k;
258 234 if (twidx >= Norig) twidx -= Norig;
259 // execute the p different work units in different threads 235 C_MUL(t, scratch[q], twiddles[twidx]);
260# pragma omp parallel for 236 C_ADDTO(Fout[k], t);
261 for (k=0;k<p;++k) 237 }
262 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st); 238 k += m;
263 // all threads have joined by this point
264
265 switch (p) {
266 case 2: kf_bfly2(Fout,fstride,st,m); break;
267 case 3: kf_bfly3(Fout,fstride,st,m); break;
268 case 4: kf_bfly4(Fout,fstride,st,m); break;
269 case 5: kf_bfly5(Fout,fstride,st,m); break;
270 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
271 }
272 return;
273 } 239 }
274#endif 240 }
241 KISS_FFT_TMP_FREE(scratch);
242}
275 243
276 if (m==1) { 244static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
277 do{ 245 const size_t fstride, int in_stride, int *factors,
278 *Fout = *f; 246 const kiss_fft_cfg st) {
279 f += fstride*in_stride; 247 kiss_fft_cpx *Fout_beg = Fout;
280 }while(++Fout != Fout_end ); 248 const int p = *factors++; /* the radix */
281 }else{ 249 const int m = *factors++; /* stage's fft length/p */
282 do{ 250 const kiss_fft_cpx *Fout_end = Fout + p * m;
283 // recursive call:
284 // DFT of size m*p performed by doing
285 // p instances of smaller DFTs of size m,
286 // each one takes a decimated version of the input
287 kf_work( Fout , f, fstride*p, in_stride, factors,st);
288 f += fstride*in_stride;
289 }while( (Fout += m) != Fout_end );
290 }
291 251
292 Fout=Fout_beg; 252#ifdef _OPENMP
253 // use openmp extensions at the
254 // top-level (not recursive)
255 if (fstride == 1 && p <= 5) {
256 int k;
257
258 // execute the p different work units in different threads
259#pragma omp parallel for
260 for (k = 0; k < p; ++k)
261 kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride,
262 factors, st);
263 // all threads have joined by this point
293 264
294 // recombine the p smaller DFTs
295 switch (p) { 265 switch (p) {
296 case 2: kf_bfly2(Fout,fstride,st,m); break; 266 case 2:
297 case 3: kf_bfly3(Fout,fstride,st,m); break; 267 kf_bfly2(Fout, fstride, st, m);
298 case 4: kf_bfly4(Fout,fstride,st,m); break; 268 break;
299 case 5: kf_bfly5(Fout,fstride,st,m); break; 269 case 3:
300 default: kf_bfly_generic(Fout,fstride,st,m,p); break; 270 kf_bfly3(Fout, fstride, st, m);
271 break;
272 case 4:
273 kf_bfly4(Fout, fstride, st, m);
274 break;
275 case 5:
276 kf_bfly5(Fout, fstride, st, m);
277 break;
278 default:
279 kf_bfly_generic(Fout, fstride, st, m, p);
280 break;
301 } 281 }
282 return;
283 }
284#endif
285
286 if (m == 1) {
287 do {
288 *Fout = *f;
289 f += fstride * in_stride;
290 } while (++Fout != Fout_end);
291 } else {
292 do {
293 // recursive call:
294 // DFT of size m*p performed by doing
295 // p instances of smaller DFTs of size m,
296 // each one takes a decimated version of the input
297 kf_work(Fout, f, fstride * p, in_stride, factors, st);
298 f += fstride * in_stride;
299 } while ((Fout += m) != Fout_end);
300 }
301
302 Fout = Fout_beg;
303
304 // recombine the p smaller DFTs
305 switch (p) {
306 case 2:
307 kf_bfly2(Fout, fstride, st, m);
308 break;
309 case 3:
310 kf_bfly3(Fout, fstride, st, m);
311 break;
312 case 4:
313 kf_bfly4(Fout, fstride, st, m);
314 break;
315 case 5:
316 kf_bfly5(Fout, fstride, st, m);
317 break;
318 default:
319 kf_bfly_generic(Fout, fstride, st, m, p);
320 break;
321 }
302} 322}
303 323
304/* facbuf is populated by p1,m1,p2,m2, ... 324/* facbuf is populated by p1,m1,p2,m2, ...
305 where 325 where
306 p[i] * m[i] = m[i-1] 326 p[i] * m[i] = m[i-1]
307 m0 = n */ 327 m0 = n */
308static 328static void kf_factor(int n, int *facbuf) {
309void kf_factor(int n,int * facbuf) 329 int p = 4;
310{ 330 double floor_sqrt;
311 int p=4; 331 floor_sqrt = floorf(sqrtf((double)n));
312 double floor_sqrt; 332
313 floor_sqrt = floorf( sqrtf((double)n) ); 333 /*factor out powers of 4, powers of 2, then any remaining primes */
314 334 do {
315 /*factor out powers of 4, powers of 2, then any remaining primes */ 335 while (n % p) {
316 do { 336 switch (p) {
317 while (n % p) { 337 case 4:
318 switch (p) { 338 p = 2;
319 case 4: p = 2; break; 339 break;
320 case 2: p = 3; break; 340 case 2:
321 default: p += 2; break; 341 p = 3;
322 } 342 break;
323 if (p > floor_sqrt) 343 default:
324 p = n; /* no more factors, skip to end */ 344 p += 2;
325 } 345 break;
326 n /= p; 346 }
327 *facbuf++ = p; 347 if (p > floor_sqrt) p = n; /* no more factors, skip to end */
328 *facbuf++ = n; 348 }
329 } while (n > 1); 349 n /= p;
350 *facbuf++ = p;
351 *facbuf++ = n;
352 } while (n > 1);
330} 353}
331 354
332/* 355/*
333 * 356 *
334 * User-callable function to allocate all necessary storage space for the fft. 357 * User-callable function to allocate all necessary storage space for the fft.
335 * 358 *
336 * The return value is a contiguous block of memory, allocated with malloc. As such, 359 * The return value is a contiguous block of memory, allocated with malloc. As
337 * It can be freed with free(), rather than a kiss_fft-specific function. 360 * such, It can be freed with free(), rather than a kiss_fft-specific function.
338 * */ 361 * */
339kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) 362kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem,
340{ 363 size_t *lenmem) {
341 kiss_fft_cfg st=NULL; 364 kiss_fft_cfg st = NULL;
342 size_t memneeded = sizeof(struct kiss_fft_state) 365 size_t memneeded = sizeof(struct kiss_fft_state) +
343 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/ 366 sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
344 367
345 if ( lenmem==NULL ) { 368 if (lenmem == NULL) {
346 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); 369 st = (kiss_fft_cfg)KISS_FFT_MALLOC(memneeded);
347 }else{ 370 } else {
348 if (mem != NULL && *lenmem >= memneeded) 371 if (mem != NULL && *lenmem >= memneeded) st = (kiss_fft_cfg)mem;
349 st = (kiss_fft_cfg)mem; 372 *lenmem = memneeded;
350 *lenmem = memneeded; 373 }
351 } 374 if (st) {
352 if (st) { 375 int i;
353 int i; 376 st->nfft = nfft;
354 st->nfft=nfft; 377 st->inverse = inverse_fft;
355 st->inverse = inverse_fft; 378
356 379 for (i = 0; i < nfft; ++i) {
357 for (i=0;i<nfft;++i) { 380 const double pi =
358 const double pi=3.141592653589793238462643383279502884197169399375105820974944; 381 3.141592653589793238462643383279502884197169399375105820974944;
359 double phase = -2*pi*i / nfft; 382 double phase = -2 * pi * i / nfft;
360 if (st->inverse) 383 if (st->inverse) phase *= -1;
361 phase *= -1; 384 kf_cexp(st->twiddles + i, phase);
362 kf_cexp(st->twiddles+i, phase );
363 }
364
365 kf_factor(nfft,st->factors);
366 } 385 }
367 return st;
368}
369
370 386
371void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) 387 kf_factor(nfft, st->factors);
372{ 388 }
373 if (fin == fout) { 389 return st;
374 //NOTE: this is not really an in-place FFT algorithm.
375 //It just performs an out-of-place FFT into a temp buffer
376 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
377 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
378 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
379 KISS_FFT_TMP_FREE(tmpbuf);
380 }else{
381 kf_work( fout, fin, 1,in_stride, st->factors,st );
382 }
383} 390}
384 391
385void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 392void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin,
386{ 393 kiss_fft_cpx *fout, int in_stride) {
387 kiss_fft_stride(cfg,fin,fout,1); 394 if (fin == fout) {
395 // NOTE: this is not really an in-place FFT algorithm.
396 // It just performs an out-of-place FFT into a temp buffer
397 kiss_fft_cpx *tmpbuf =
398 (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft);
399 kf_work(tmpbuf, fin, 1, in_stride, st->factors, st);
400 memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft);
401 KISS_FFT_TMP_FREE(tmpbuf);
402 } else {
403 kf_work(fout, fin, 1, in_stride, st->factors, st);
404 }
388} 405}
389 406
407void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) {
408 kiss_fft_stride(cfg, fin, fout, 1);
409}
390 410
391void kiss_fft_cleanup(void) 411void kiss_fft_cleanup(void) {
392{ 412 // nothing needed any more
393 // nothing needed any more
394} 413}
395 414
396int kiss_fft_next_fast_size(int n) 415int kiss_fft_next_fast_size(int n) {
397{ 416 while (1) {
398 while(1) { 417 int m = n;
399 int m=n; 418 while ((m % 2) == 0) m /= 2;
400 while ( (m%2) == 0 ) m/=2; 419 while ((m % 3) == 0) m /= 3;
401 while ( (m%3) == 0 ) m/=3; 420 while ((m % 5) == 0) m /= 5;
402 while ( (m%5) == 0 ) m/=5; 421 if (m <= 1)
403 if (m<=1) 422 break; /* n is completely factorable by twos, threes, and fives */
404 break; /* n is completely factorable by twos, threes, and fives */ 423 n++;
405 n++; 424 }
406 } 425 return n;
407 return n;
408} 426}