2 Copyright (c) 2003-2004, Mark Borgerding
3 Lots of modifications by Jean-Marc Valin
4 Copyright (c) 2005-2007, Xiph.Org Foundation
5 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
11 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 #include "_kiss_fft_guts.h"
26 #include "os_support.h"
28 #include "stack_alloc.h"
30 /* The guts header contains all the multiplication and addition macros that are defined for
31 complex numbers. It also delares the kf_ internal functions.
37 const kiss_fft_cfg st,
44 kiss_twiddle_cpx * tw1;
46 kiss_fft_cpx * Fout_beg = Fout;
49 Fout = Fout_beg + i*mm;
55 Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
56 Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
57 C_MUL (t, *Fout2 , *tw1);
59 C_SUB( *Fout2 , *Fout , t );
70 const kiss_fft_cfg st,
77 kiss_twiddle_cpx * tw1;
80 kiss_fft_cpx * Fout_beg = Fout;
83 Fout = Fout_beg + i*mm;
88 C_MULC (t, *Fout2 , *tw1);
90 C_SUB( *Fout2 , *Fout , t );
100 const size_t fstride,
101 const kiss_fft_cfg st,
107 kiss_twiddle_cpx *tw1,*tw2,*tw3;
108 kiss_fft_cpx scratch[6];
113 kiss_fft_cpx * Fout_beg = Fout;
116 Fout = Fout_beg + i*mm;
117 tw3 = tw2 = tw1 = st->twiddles;
120 C_MUL4(scratch[0],Fout[m] , *tw1 );
121 C_MUL4(scratch[1],Fout[m2] , *tw2 );
122 C_MUL4(scratch[2],Fout[m3] , *tw3 );
124 Fout->r = PSHR(Fout->r, 2);
125 Fout->i = PSHR(Fout->i, 2);
126 C_SUB( scratch[5] , *Fout, scratch[1] );
127 C_ADDTO(*Fout, scratch[1]);
128 C_ADD( scratch[3] , scratch[0] , scratch[2] );
129 C_SUB( scratch[4] , scratch[0] , scratch[2] );
130 Fout[m2].r = PSHR(Fout[m2].r, 2);
131 Fout[m2].i = PSHR(Fout[m2].i, 2);
132 C_SUB( Fout[m2], *Fout, scratch[3] );
136 C_ADDTO( *Fout , scratch[3] );
138 Fout[m].r = scratch[5].r + scratch[4].i;
139 Fout[m].i = scratch[5].i - scratch[4].r;
140 Fout[m3].r = scratch[5].r - scratch[4].i;
141 Fout[m3].i = scratch[5].i + scratch[4].r;
147 static void ki_bfly4(
149 const size_t fstride,
150 const kiss_fft_cfg st,
156 kiss_twiddle_cpx *tw1,*tw2,*tw3;
157 kiss_fft_cpx scratch[6];
162 kiss_fft_cpx * Fout_beg = Fout;
165 Fout = Fout_beg + i*mm;
166 tw3 = tw2 = tw1 = st->twiddles;
169 C_MULC(scratch[0],Fout[m] , *tw1 );
170 C_MULC(scratch[1],Fout[m2] , *tw2 );
171 C_MULC(scratch[2],Fout[m3] , *tw3 );
173 C_SUB( scratch[5] , *Fout, scratch[1] );
174 C_ADDTO(*Fout, scratch[1]);
175 C_ADD( scratch[3] , scratch[0] , scratch[2] );
176 C_SUB( scratch[4] , scratch[0] , scratch[2] );
177 C_SUB( Fout[m2], *Fout, scratch[3] );
181 C_ADDTO( *Fout , scratch[3] );
183 Fout[m].r = scratch[5].r - scratch[4].i;
184 Fout[m].i = scratch[5].i + scratch[4].r;
185 Fout[m3].r = scratch[5].r + scratch[4].i;
186 Fout[m3].i = scratch[5].i - scratch[4].r;
192 #ifndef RADIX_TWO_ONLY
194 static void kf_bfly3(
196 const size_t fstride,
197 const kiss_fft_cfg st,
202 const size_t m2 = 2*m;
203 kiss_twiddle_cpx *tw1,*tw2;
204 kiss_fft_cpx scratch[5];
205 kiss_twiddle_cpx epi3;
206 epi3 = st->twiddles[fstride*m];
208 tw1=tw2=st->twiddles;
210 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
212 C_MUL(scratch[1],Fout[m] , *tw1);
213 C_MUL(scratch[2],Fout[m2] , *tw2);
215 C_ADD(scratch[3],scratch[1],scratch[2]);
216 C_SUB(scratch[0],scratch[1],scratch[2]);
220 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
221 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
223 C_MULBYSCALAR( scratch[0] , epi3.i );
225 C_ADDTO(*Fout,scratch[3]);
227 Fout[m2].r = Fout[m].r + scratch[0].i;
228 Fout[m2].i = Fout[m].i - scratch[0].r;
230 Fout[m].r -= scratch[0].i;
231 Fout[m].i += scratch[0].r;
237 static void ki_bfly3(
239 const size_t fstride,
240 const kiss_fft_cfg st,
245 const size_t m2 = 2*m;
246 kiss_twiddle_cpx *tw1,*tw2;
247 kiss_fft_cpx scratch[5];
248 kiss_twiddle_cpx epi3;
249 epi3 = st->twiddles[fstride*m];
251 tw1=tw2=st->twiddles;
254 C_MULC(scratch[1],Fout[m] , *tw1);
255 C_MULC(scratch[2],Fout[m2] , *tw2);
257 C_ADD(scratch[3],scratch[1],scratch[2]);
258 C_SUB(scratch[0],scratch[1],scratch[2]);
262 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
263 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
265 C_MULBYSCALAR( scratch[0] , -epi3.i );
267 C_ADDTO(*Fout,scratch[3]);
269 Fout[m2].r = Fout[m].r + scratch[0].i;
270 Fout[m2].i = Fout[m].i - scratch[0].r;
272 Fout[m].r -= scratch[0].i;
273 Fout[m].i += scratch[0].r;
280 static void kf_bfly5(
282 const size_t fstride,
283 const kiss_fft_cfg st,
287 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
289 kiss_fft_cpx scratch[13];
290 kiss_twiddle_cpx * twiddles = st->twiddles;
291 kiss_twiddle_cpx *tw;
292 kiss_twiddle_cpx ya,yb;
293 ya = twiddles[fstride*m];
294 yb = twiddles[fstride*2*m];
303 for ( u=0; u<m; ++u ) {
304 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
307 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
308 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
309 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
310 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
312 C_ADD( scratch[7],scratch[1],scratch[4]);
313 C_SUB( scratch[10],scratch[1],scratch[4]);
314 C_ADD( scratch[8],scratch[2],scratch[3]);
315 C_SUB( scratch[9],scratch[2],scratch[3]);
317 Fout0->r += scratch[7].r + scratch[8].r;
318 Fout0->i += scratch[7].i + scratch[8].i;
320 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
321 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
323 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
324 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
326 C_SUB(*Fout1,scratch[5],scratch[6]);
327 C_ADD(*Fout4,scratch[5],scratch[6]);
329 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
330 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
331 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
332 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
334 C_ADD(*Fout2,scratch[11],scratch[12]);
335 C_SUB(*Fout3,scratch[11],scratch[12]);
337 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
341 static void ki_bfly5(
343 const size_t fstride,
344 const kiss_fft_cfg st,
348 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
350 kiss_fft_cpx scratch[13];
351 kiss_twiddle_cpx * twiddles = st->twiddles;
352 kiss_twiddle_cpx *tw;
353 kiss_twiddle_cpx ya,yb;
354 ya = twiddles[fstride*m];
355 yb = twiddles[fstride*2*m];
364 for ( u=0; u<m; ++u ) {
367 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
368 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
369 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
370 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
372 C_ADD( scratch[7],scratch[1],scratch[4]);
373 C_SUB( scratch[10],scratch[1],scratch[4]);
374 C_ADD( scratch[8],scratch[2],scratch[3]);
375 C_SUB( scratch[9],scratch[2],scratch[3]);
377 Fout0->r += scratch[7].r + scratch[8].r;
378 Fout0->i += scratch[7].i + scratch[8].i;
380 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
381 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
383 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
384 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
386 C_SUB(*Fout1,scratch[5],scratch[6]);
387 C_ADD(*Fout4,scratch[5],scratch[6]);
389 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
390 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
391 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
392 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
394 C_ADD(*Fout2,scratch[11],scratch[12]);
395 C_SUB(*Fout3,scratch[11],scratch[12]);
397 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
401 /* perform the butterfly for one stage of a mixed radix FFT */
402 static void kf_bfly_generic(
404 const size_t fstride,
405 const kiss_fft_cfg st,
411 kiss_twiddle_cpx * twiddles = st->twiddles;
413 VARDECL(kiss_fft_cpx, scratchbuf);
414 int Norig = st->nfft;
415 ALLOC(scratchbuf, p, kiss_fft_cpx);
417 for ( u=0; u<m; ++u ) {
419 for ( q1=0 ; q1<p ; ++q1 ) {
420 scratchbuf[q1] = Fout[ k ];
421 C_FIXDIV(scratchbuf[q1],p);
426 for ( q1=0 ; q1<p ; ++q1 ) {
428 Fout[ k ] = scratchbuf[0];
430 twidx += fstride * k;
431 if (twidx>=Norig) twidx-=Norig;
432 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
433 C_ADDTO( Fout[ k ] ,t);
440 static void ki_bfly_generic(
442 const size_t fstride,
443 const kiss_fft_cfg st,
449 kiss_twiddle_cpx * twiddles = st->twiddles;
451 VARDECL(kiss_fft_cpx, scratchbuf);
452 int Norig = st->nfft;
453 ALLOC(scratchbuf, p, kiss_fft_cpx);
455 for ( u=0; u<m; ++u ) {
457 for ( q1=0 ; q1<p ; ++q1 ) {
458 scratchbuf[q1] = Fout[ k ];
463 for ( q1=0 ; q1<p ; ++q1 ) {
465 Fout[ k ] = scratchbuf[0];
467 twidx += fstride * k;
468 if (twidx>=Norig) twidx-=Norig;
469 C_MULC(t,scratchbuf[q] , twiddles[twidx] );
470 C_ADDTO( Fout[ k ] ,t);
479 void compute_bitrev_table(
482 const size_t fstride,
485 const kiss_fft_cfg st
488 const int p=*factors++; /* the radix */
489 const int m=*factors++; /* stage's fft length/p */
491 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
498 f += fstride*in_stride;
504 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
505 f += fstride*in_stride;
514 const kiss_fft_cpx * f,
515 const size_t fstride,
518 const kiss_fft_cfg st,
524 #ifndef RADIX_TWO_ONLY
526 kiss_fft_cpx * Fout_beg=Fout;
528 const int p=*factors++; /* the radix */
529 const int m=*factors++; /* stage's fft length/p */
530 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
532 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
535 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
536 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
537 #ifndef RADIX_TWO_ONLY
538 case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break;
539 case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break;
540 default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break;
542 default: celt_fatal("kiss_fft: only powers of two enabled");
550 const kiss_fft_cpx * f,
551 const size_t fstride,
554 const kiss_fft_cfg st,
560 #ifndef RADIX_TWO_ONLY
562 kiss_fft_cpx * Fout_beg=Fout;
564 const int p=*factors++; /* the radix */
565 const int m=*factors++; /* stage's fft length/p */
566 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
568 ki_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
571 case 2: ki_bfly2(Fout,fstride,st,m, N, m2); break;
572 case 4: ki_bfly4(Fout,fstride,st,m, N, m2); break;
573 #ifndef RADIX_TWO_ONLY
574 case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly3(Fout,fstride,st,m);} break;
575 case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly5(Fout,fstride,st,m);} break;
576 default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly_generic(Fout,fstride,st,m,p);} break;
578 default: celt_fatal("kiss_fft: only powers of two enabled");
583 /* facbuf is populated by p1,m1,p2,m2, ...
588 void kf_factor(int n,int * facbuf)
592 /*factor out powers of 4, powers of 2, then any remaining primes */
596 case 4: p = 2; break;
597 case 2: p = 3; break;
598 default: p += 2; break;
600 if (p>32000 || (celt_int32)p*(celt_int32)p > n)
601 p = n; /* no more factors, skip to end */
610 * User-callable function to allocate all necessary storage space for the fft.
612 * The return value is a contiguous block of memory, allocated with malloc. As such,
613 * It can be freed with free(), rather than a kiss_fft-specific function.
615 kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem )
617 kiss_fft_cfg st=NULL;
618 size_t memneeded = sizeof(struct kiss_fft_state)
619 + sizeof(kiss_twiddle_cpx)*(nfft-1) + sizeof(int)*nfft; /* twiddle factors*/
621 if ( lenmem==NULL ) {
622 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
624 if (mem != NULL && *lenmem >= memneeded)
625 st = (kiss_fft_cfg)mem;
634 #if defined(FIXED_POINT) && (!defined(DOUBLE_PRECISION) || defined(MIXED_PRECISION))
635 for (i=0;i<nfft;++i) {
636 celt_word32_t phase = -i;
637 kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
640 for (i=0;i<nfft;++i) {
641 const double pi=3.14159265358979323846264338327;
642 double phase = ( -2*pi /nfft ) * i;
643 kf_cexp(st->twiddles+i, phase );
646 kf_factor(nfft,st->factors);
649 st->bitrev = (int*)((char*)st + memneeded - sizeof(int)*nfft);
650 compute_bitrev_table(0, st->bitrev, 1,1, st->factors,st);
658 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
662 celt_fatal("In-place FFT not supported");
664 /* Bit-reverse the input */
666 for (i=0;i<st->nfft;i++)
668 fout[st->bitrev[i]] = fin[i];
670 fout[st->bitrev[i]].r *= st->scale;
671 fout[st->bitrev[i]].i *= st->scale;
674 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
678 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
680 kiss_fft_stride(cfg,fin,fout,1);
683 void kiss_ifft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
687 celt_fatal("In-place FFT not supported");
689 /* Bit-reverse the input */
691 for (i=0;i<st->nfft;i++)
692 fout[st->bitrev[i]] = fin[i];
693 ki_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
697 void kiss_ifft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
699 kiss_ifft_stride(cfg,fin,fout,1);