blob: a0b3724be058402beafaefb0f1bcad15e8e843c2 [file] [log] [blame]
Benny Prijonoda6e89a2006-06-18 01:58:38 +00001/*
2Copyright (c) 2003-2004, Mark Borgerding
3
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*/
14
15
16#ifdef HAVE_CONFIG_H
17#include "config.h"
18#endif
19
20#include "_kiss_fft_guts.h"
21#include "misc.h"
22
23/* The guts header contains all the multiplication and addition macros that are defined for
24 fixed or floating point complex numbers. It also delares the kf_ internal functions.
25 */
26
27static kiss_fft_cpx *scratchbuf=NULL;
28static size_t nscratchbuf=0;
29static kiss_fft_cpx *tmpbuf=NULL;
30static size_t ntmpbuf=0;
31
32#define CHECKBUF(buf,nbuf,n) \
33 do { \
34 if ( nbuf < (size_t)(n) ) {\
Benny Prijonodc498ca2006-07-26 17:04:54 +000035 speex_free(buf); \
Benny Prijonoda6e89a2006-06-18 01:58:38 +000036 buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
37 nbuf = (size_t)(n); \
38 } \
39 }while(0)
40
41static void kf_bfly2(
42 kiss_fft_cpx * Fout,
43 const size_t fstride,
44 const kiss_fft_cfg st,
45 int m
46 )
47{
48 kiss_fft_cpx * Fout2;
49 kiss_fft_cpx * tw1 = st->twiddles;
50 kiss_fft_cpx t;
51 Fout2 = Fout + m;
52 if (!st->inverse) {
53 int i;
54 kiss_fft_cpx *x=Fout;
55 for (i=0;i<2*m;i++)
56 {
57 x[i].r = SHR(x[i].r,1);
58 x[i].i = SHR(x[i].i,1);
59 }
60 }
61
62 do{
63 C_MUL (t, *Fout2 , *tw1);
64 tw1 += fstride;
65 C_SUB( *Fout2 , *Fout , t );
66 C_ADDTO( *Fout , t );
67 ++Fout2;
68 ++Fout;
69 }while (--m);
70}
71
72static void kf_bfly4(
73 kiss_fft_cpx * Fout,
74 const size_t fstride,
75 const kiss_fft_cfg st,
76 const size_t m
77 )
78{
79 kiss_fft_cpx *tw1,*tw2,*tw3;
80 kiss_fft_cpx scratch[6];
81 size_t k=m;
82 const size_t m2=2*m;
83 const size_t m3=3*m;
84
85 tw3 = tw2 = tw1 = st->twiddles;
86
87 if (!st->inverse) {
88 int i;
89 kiss_fft_cpx *x=Fout;
Benny Prijonodc498ca2006-07-26 17:04:54 +000090 for (i=0;i<4*m;i++)
Benny Prijonoda6e89a2006-06-18 01:58:38 +000091 {
92 x[i].r = PSHR16(x[i].r,2);
93 x[i].i = PSHR16(x[i].i,2);
94 }
95 }
96 if (st->inverse)
97 {
98 do {
99 C_MUL(scratch[0],Fout[m] , *tw1 );
100 C_MUL(scratch[1],Fout[m2] , *tw2 );
101 C_MUL(scratch[2],Fout[m3] , *tw3 );
102
103 C_SUB( scratch[5] , *Fout, scratch[1] );
104 C_ADDTO(*Fout, scratch[1]);
105 C_ADD( scratch[3] , scratch[0] , scratch[2] );
106 C_SUB( scratch[4] , scratch[0] , scratch[2] );
107 C_SUB( Fout[m2], *Fout, scratch[3] );
108 tw1 += fstride;
109 tw2 += fstride*2;
110 tw3 += fstride*3;
111 C_ADDTO( *Fout , scratch[3] );
112
113 Fout[m].r = scratch[5].r - scratch[4].i;
114 Fout[m].i = scratch[5].i + scratch[4].r;
115 Fout[m3].r = scratch[5].r + scratch[4].i;
116 Fout[m3].i = scratch[5].i - scratch[4].r;
117 ++Fout;
118 } while(--k);
119 } else
120 {
121 do {
122 C_MUL(scratch[0],Fout[m] , *tw1 );
123 C_MUL(scratch[1],Fout[m2] , *tw2 );
124 C_MUL(scratch[2],Fout[m3] , *tw3 );
125
126 C_SUB( scratch[5] , *Fout, scratch[1] );
127 C_ADDTO(*Fout, scratch[1]);
128 C_ADD( scratch[3] , scratch[0] , scratch[2] );
129 C_SUB( scratch[4] , scratch[0] , scratch[2] );
130 C_SUB( Fout[m2], *Fout, scratch[3] );
131 tw1 += fstride;
132 tw2 += fstride*2;
133 tw3 += fstride*3;
134 C_ADDTO( *Fout , scratch[3] );
135
136 Fout[m].r = scratch[5].r + scratch[4].i;
137 Fout[m].i = scratch[5].i - scratch[4].r;
138 Fout[m3].r = scratch[5].r - scratch[4].i;
139 Fout[m3].i = scratch[5].i + scratch[4].r;
140 ++Fout;
141 }while(--k);
142 }
143}
144
145static void kf_bfly3(
146 kiss_fft_cpx * Fout,
147 const size_t fstride,
148 const kiss_fft_cfg st,
149 size_t m
150 )
151{
152 size_t k=m;
153 const size_t m2 = 2*m;
154 kiss_fft_cpx *tw1,*tw2;
155 kiss_fft_cpx scratch[5];
156 kiss_fft_cpx epi3;
157 epi3 = st->twiddles[fstride*m];
158
159 tw1=tw2=st->twiddles;
160
161 do{
162 if (!st->inverse) {
163 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
164 }
165
166 C_MUL(scratch[1],Fout[m] , *tw1);
167 C_MUL(scratch[2],Fout[m2] , *tw2);
168
169 C_ADD(scratch[3],scratch[1],scratch[2]);
170 C_SUB(scratch[0],scratch[1],scratch[2]);
171 tw1 += fstride;
172 tw2 += fstride*2;
173
174 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
175 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
176
177 C_MULBYSCALAR( scratch[0] , epi3.i );
178
179 C_ADDTO(*Fout,scratch[3]);
180
181 Fout[m2].r = Fout[m].r + scratch[0].i;
182 Fout[m2].i = Fout[m].i - scratch[0].r;
183
184 Fout[m].r -= scratch[0].i;
185 Fout[m].i += scratch[0].r;
186
187 ++Fout;
188 }while(--k);
189}
190
191static void kf_bfly5(
192 kiss_fft_cpx * Fout,
193 const size_t fstride,
194 const kiss_fft_cfg st,
195 int m
196 )
197{
198 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
199 int u;
200 kiss_fft_cpx scratch[13];
201 kiss_fft_cpx * twiddles = st->twiddles;
202 kiss_fft_cpx *tw;
203 kiss_fft_cpx ya,yb;
204 ya = twiddles[fstride*m];
205 yb = twiddles[fstride*2*m];
206
207 Fout0=Fout;
208 Fout1=Fout0+m;
209 Fout2=Fout0+2*m;
210 Fout3=Fout0+3*m;
211 Fout4=Fout0+4*m;
212
213 tw=st->twiddles;
214 for ( u=0; u<m; ++u ) {
215 if (!st->inverse) {
216 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
217 }
218 scratch[0] = *Fout0;
219
220 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
221 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
222 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
223 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
224
225 C_ADD( scratch[7],scratch[1],scratch[4]);
226 C_SUB( scratch[10],scratch[1],scratch[4]);
227 C_ADD( scratch[8],scratch[2],scratch[3]);
228 C_SUB( scratch[9],scratch[2],scratch[3]);
229
230 Fout0->r += scratch[7].r + scratch[8].r;
231 Fout0->i += scratch[7].i + scratch[8].i;
232
233 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
234 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
235
236 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
237 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
238
239 C_SUB(*Fout1,scratch[5],scratch[6]);
240 C_ADD(*Fout4,scratch[5],scratch[6]);
241
242 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
243 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
244 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
245 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
246
247 C_ADD(*Fout2,scratch[11],scratch[12]);
248 C_SUB(*Fout3,scratch[11],scratch[12]);
249
250 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
251 }
252}
253
254/* perform the butterfly for one stage of a mixed radix FFT */
255static void kf_bfly_generic(
256 kiss_fft_cpx * Fout,
257 const size_t fstride,
258 const kiss_fft_cfg st,
259 int m,
260 int p
261 )
262{
263 int u,k,q1,q;
264 kiss_fft_cpx * twiddles = st->twiddles;
265 kiss_fft_cpx t;
266 int Norig = st->nfft;
267
268 CHECKBUF(scratchbuf,nscratchbuf,p);
269
270 for ( u=0; u<m; ++u ) {
271 k=u;
272 for ( q1=0 ; q1<p ; ++q1 ) {
273 scratchbuf[q1] = Fout[ k ];
274 if (!st->inverse) {
275 C_FIXDIV(scratchbuf[q1],p);
276 }
277 k += m;
278 }
279
280 k=u;
281 for ( q1=0 ; q1<p ; ++q1 ) {
282 int twidx=0;
283 Fout[ k ] = scratchbuf[0];
284 for (q=1;q<p;++q ) {
285 twidx += fstride * k;
286 if (twidx>=Norig) twidx-=Norig;
287 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
288 C_ADDTO( Fout[ k ] ,t);
289 }
290 k += m;
291 }
292 }
293}
294
295static
296void kf_work(
297 kiss_fft_cpx * Fout,
298 const kiss_fft_cpx * f,
299 const size_t fstride,
300 int in_stride,
301 int * factors,
302 const kiss_fft_cfg st
303 )
304{
305 kiss_fft_cpx * Fout_beg=Fout;
306 const int p=*factors++; /* the radix */
307 const int m=*factors++; /* stage's fft length/p */
308 const kiss_fft_cpx * Fout_end = Fout + p*m;
309
310 if (m==1) {
311 do{
312 *Fout = *f;
313 f += fstride*in_stride;
314 }while(++Fout != Fout_end );
315 }else{
316 do{
317 kf_work( Fout , f, fstride*p, in_stride, factors,st);
318 f += fstride*in_stride;
319 }while( (Fout += m) != Fout_end );
320 }
321
322 Fout=Fout_beg;
323
324 switch (p) {
325 case 2: kf_bfly2(Fout,fstride,st,m); break;
326 case 3: kf_bfly3(Fout,fstride,st,m); break;
327 case 4: kf_bfly4(Fout,fstride,st,m); break;
328 case 5: kf_bfly5(Fout,fstride,st,m); break;
329 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
330 }
331}
332
333/* facbuf is populated by p1,m1,p2,m2, ...
334 where
335 p[i] * m[i] = m[i-1]
336 m0 = n */
337static
338void kf_factor(int n,int * facbuf)
339{
340 int p=4;
341 double floor_sqrt;
342 floor_sqrt = floor( sqrt((double)n) );
343
344 /*factor out powers of 4, powers of 2, then any remaining primes */
345 do {
346 while (n % p) {
347 switch (p) {
348 case 4: p = 2; break;
349 case 2: p = 3; break;
350 default: p += 2; break;
351 }
352 if (p > floor_sqrt)
353 p = n; /* no more factors, skip to end */
354 }
355 n /= p;
356 *facbuf++ = p;
357 *facbuf++ = n;
358 } while (n > 1);
359}
360
361/*
362 *
363 * User-callable function to allocate all necessary storage space for the fft.
364 *
365 * The return value is a contiguous block of memory, allocated with malloc. As such,
366 * It can be freed with free(), rather than a kiss_fft-specific function.
367 * */
368kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
369{
370 kiss_fft_cfg st=NULL;
371 size_t memneeded = sizeof(struct kiss_fft_state)
372 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
373
374 if ( lenmem==NULL ) {
375 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
376 }else{
377 if (mem != NULL && *lenmem >= memneeded)
378 st = (kiss_fft_cfg)mem;
379 *lenmem = memneeded;
380 }
381 if (st) {
382 int i;
383 st->nfft=nfft;
384 st->inverse = inverse_fft;
385
386 for (i=0;i<nfft;++i) {
387 const double pi=3.14159265358979323846264338327;
388 double phase = ( -2*pi /nfft ) * i;
389 if (st->inverse)
390 phase *= -1;
391 kf_cexp(st->twiddles+i, phase );
392 }
393
394 kf_factor(nfft,st->factors);
395 }
396 return st;
397}
398
399
400
401
402void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
403{
404 if (fin == fout) {
405 CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
406 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
Benny Prijonodc498ca2006-07-26 17:04:54 +0000407 speex_move(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
Benny Prijonoda6e89a2006-06-18 01:58:38 +0000408 }else{
409 kf_work( fout, fin, 1,in_stride, st->factors,st );
410 }
411}
412
413void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
414{
415 kiss_fft_stride(cfg,fin,fout,1);
416}
417
418
419/* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
420 buffers from CHECKBUF
421 */
422void kiss_fft_cleanup(void)
423{
Benny Prijonodc498ca2006-07-26 17:04:54 +0000424 speex_free(scratchbuf);
Benny Prijonoda6e89a2006-06-18 01:58:38 +0000425 scratchbuf = NULL;
426 nscratchbuf=0;
Benny Prijonodc498ca2006-07-26 17:04:54 +0000427 speex_free(tmpbuf);
Benny Prijonoda6e89a2006-06-18 01:58:38 +0000428 tmpbuf=NULL;
429 ntmpbuf=0;
430}