2 Copyright (c) 2003-2004, Mark Borgerding
3 Lots of modifications by Jean-Marc Valin
4 Copyright (c) 2005-2007, Xiph.Org Foundation
5 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
11 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 #include "_kiss_fft_guts.h"
26 #include "os_support.h"
28 #include "stack_alloc.h"
30 /* The guts header contains all the multiplication and addition macros that are defined for
31 complex numbers. It also delares the kf_ internal functions.
37 const kiss_fft_state *st,
44 const kiss_twiddle_cpx * tw1;
46 kiss_fft_cpx * Fout_beg = Fout;
49 Fout = Fout_beg + i*mm;
55 Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
56 Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
57 C_MUL (t, *Fout2 , *tw1);
59 C_SUB( *Fout2 , *Fout , t );
70 const kiss_fft_state *st,
77 const kiss_twiddle_cpx * tw1;
80 kiss_fft_cpx * Fout_beg = Fout;
83 Fout = Fout_beg + i*mm;
88 C_MULC (t, *Fout2 , *tw1);
90 C_SUB( *Fout2 , *Fout , t );
100 const size_t fstride,
101 const kiss_fft_state *st,
107 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
108 kiss_fft_cpx scratch[6];
113 kiss_fft_cpx * Fout_beg = Fout;
116 Fout = Fout_beg + i*mm;
117 tw3 = tw2 = tw1 = st->twiddles;
120 C_MUL4(scratch[0],Fout[m] , *tw1 );
121 C_MUL4(scratch[1],Fout[m2] , *tw2 );
122 C_MUL4(scratch[2],Fout[m3] , *tw3 );
124 Fout->r = PSHR(Fout->r, 2);
125 Fout->i = PSHR(Fout->i, 2);
126 C_SUB( scratch[5] , *Fout, scratch[1] );
127 C_ADDTO(*Fout, scratch[1]);
128 C_ADD( scratch[3] , scratch[0] , scratch[2] );
129 C_SUB( scratch[4] , scratch[0] , scratch[2] );
130 Fout[m2].r = PSHR(Fout[m2].r, 2);
131 Fout[m2].i = PSHR(Fout[m2].i, 2);
132 C_SUB( Fout[m2], *Fout, scratch[3] );
136 C_ADDTO( *Fout , scratch[3] );
138 Fout[m].r = scratch[5].r + scratch[4].i;
139 Fout[m].i = scratch[5].i - scratch[4].r;
140 Fout[m3].r = scratch[5].r - scratch[4].i;
141 Fout[m3].i = scratch[5].i + scratch[4].r;
147 static void ki_bfly4(
149 const size_t fstride,
150 const kiss_fft_state *st,
156 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
157 kiss_fft_cpx scratch[6];
162 kiss_fft_cpx * Fout_beg = Fout;
165 Fout = Fout_beg + i*mm;
166 tw3 = tw2 = tw1 = st->twiddles;
169 C_MULC(scratch[0],Fout[m] , *tw1 );
170 C_MULC(scratch[1],Fout[m2] , *tw2 );
171 C_MULC(scratch[2],Fout[m3] , *tw3 );
173 C_SUB( scratch[5] , *Fout, scratch[1] );
174 C_ADDTO(*Fout, scratch[1]);
175 C_ADD( scratch[3] , scratch[0] , scratch[2] );
176 C_SUB( scratch[4] , scratch[0] , scratch[2] );
177 C_SUB( Fout[m2], *Fout, scratch[3] );
181 C_ADDTO( *Fout , scratch[3] );
183 Fout[m].r = scratch[5].r - scratch[4].i;
184 Fout[m].i = scratch[5].i + scratch[4].r;
185 Fout[m3].r = scratch[5].r + scratch[4].i;
186 Fout[m3].i = scratch[5].i - scratch[4].r;
192 #ifndef RADIX_TWO_ONLY
194 static void kf_bfly3(
196 const size_t fstride,
197 const kiss_fft_state *st,
205 const size_t m2 = 2*m;
206 const kiss_twiddle_cpx *tw1,*tw2;
207 kiss_fft_cpx scratch[5];
208 kiss_twiddle_cpx epi3;
210 kiss_fft_cpx * Fout_beg = Fout;
211 epi3 = st->twiddles[fstride*m];
214 Fout = Fout_beg + i*mm;
215 tw1=tw2=st->twiddles;
218 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
220 C_MUL(scratch[1],Fout[m] , *tw1);
221 C_MUL(scratch[2],Fout[m2] , *tw2);
223 C_ADD(scratch[3],scratch[1],scratch[2]);
224 C_SUB(scratch[0],scratch[1],scratch[2]);
228 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
229 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
231 C_MULBYSCALAR( scratch[0] , epi3.i );
233 C_ADDTO(*Fout,scratch[3]);
235 Fout[m2].r = Fout[m].r + scratch[0].i;
236 Fout[m2].i = Fout[m].i - scratch[0].r;
238 Fout[m].r -= scratch[0].i;
239 Fout[m].i += scratch[0].r;
246 static void ki_bfly3(
248 const size_t fstride,
249 const kiss_fft_state *st,
256 const size_t m2 = 2*m;
257 const kiss_twiddle_cpx *tw1,*tw2;
258 kiss_fft_cpx scratch[5];
259 kiss_twiddle_cpx epi3;
261 kiss_fft_cpx * Fout_beg = Fout;
262 epi3 = st->twiddles[fstride*m];
265 Fout = Fout_beg + i*mm;
266 tw1=tw2=st->twiddles;
270 C_MULC(scratch[1],Fout[m] , *tw1);
271 C_MULC(scratch[2],Fout[m2] , *tw2);
273 C_ADD(scratch[3],scratch[1],scratch[2]);
274 C_SUB(scratch[0],scratch[1],scratch[2]);
278 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
279 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
281 C_MULBYSCALAR( scratch[0] , -epi3.i );
283 C_ADDTO(*Fout,scratch[3]);
285 Fout[m2].r = Fout[m].r + scratch[0].i;
286 Fout[m2].i = Fout[m].i - scratch[0].r;
288 Fout[m].r -= scratch[0].i;
289 Fout[m].i += scratch[0].r;
297 static void kf_bfly5(
299 const size_t fstride,
300 const kiss_fft_state *st,
306 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
308 kiss_fft_cpx scratch[13];
309 const kiss_twiddle_cpx * twiddles = st->twiddles;
310 const kiss_twiddle_cpx *tw;
311 kiss_twiddle_cpx ya,yb;
312 kiss_fft_cpx * Fout_beg = Fout;
314 ya = twiddles[fstride*m];
315 yb = twiddles[fstride*2*m];
320 Fout = Fout_beg + i*mm;
327 for ( u=0; u<m; ++u ) {
328 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
331 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
332 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
333 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
334 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
336 C_ADD( scratch[7],scratch[1],scratch[4]);
337 C_SUB( scratch[10],scratch[1],scratch[4]);
338 C_ADD( scratch[8],scratch[2],scratch[3]);
339 C_SUB( scratch[9],scratch[2],scratch[3]);
341 Fout0->r += scratch[7].r + scratch[8].r;
342 Fout0->i += scratch[7].i + scratch[8].i;
344 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
345 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
347 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
348 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
350 C_SUB(*Fout1,scratch[5],scratch[6]);
351 C_ADD(*Fout4,scratch[5],scratch[6]);
353 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
354 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
355 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
356 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
358 C_ADD(*Fout2,scratch[11],scratch[12]);
359 C_SUB(*Fout3,scratch[11],scratch[12]);
361 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
366 static void ki_bfly5(
368 const size_t fstride,
369 const kiss_fft_state *st,
375 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
377 kiss_fft_cpx scratch[13];
378 const kiss_twiddle_cpx * twiddles = st->twiddles;
379 const kiss_twiddle_cpx *tw;
380 kiss_twiddle_cpx ya,yb;
381 kiss_fft_cpx * Fout_beg = Fout;
383 ya = twiddles[fstride*m];
384 yb = twiddles[fstride*2*m];
389 Fout = Fout_beg + i*mm;
396 for ( u=0; u<m; ++u ) {
399 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
400 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
401 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
402 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
404 C_ADD( scratch[7],scratch[1],scratch[4]);
405 C_SUB( scratch[10],scratch[1],scratch[4]);
406 C_ADD( scratch[8],scratch[2],scratch[3]);
407 C_SUB( scratch[9],scratch[2],scratch[3]);
409 Fout0->r += scratch[7].r + scratch[8].r;
410 Fout0->i += scratch[7].i + scratch[8].i;
412 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
413 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
415 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
416 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
418 C_SUB(*Fout1,scratch[5],scratch[6]);
419 C_ADD(*Fout4,scratch[5],scratch[6]);
421 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
422 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
423 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
424 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
426 C_ADD(*Fout2,scratch[11],scratch[12]);
427 C_SUB(*Fout3,scratch[11],scratch[12]);
429 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
437 void compute_bitrev_table(
440 const size_t fstride,
442 celt_int16 * factors,
443 const kiss_fft_state *st
446 const int p=*factors++; /* the radix */
447 const int m=*factors++; /* stage's fft length/p */
449 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
456 f += fstride*in_stride;
462 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
463 f += fstride*in_stride;
472 const kiss_fft_cpx * f,
475 const celt_int16 * factors,
476 const kiss_fft_state *st,
482 const int p=*factors++; /* the radix */
483 const int m=*factors++; /* stage's fft length/p */
484 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
486 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
488 /* Compensate for longer twiddles table (when sharing) */
490 fstride <<= st->shift;
492 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
493 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
494 #ifndef RADIX_TWO_ONLY
495 case 3: kf_bfly3(Fout,fstride,st,m, N, m2); break;
496 case 5: kf_bfly5(Fout,fstride,st,m, N, m2); break;
498 default: celt_fatal("kiss_fft: only powers of two enabled");
506 const kiss_fft_cpx * f,
509 const celt_int16 * factors,
510 const kiss_fft_state *st,
516 const int p=*factors++; /* the radix */
517 const int m=*factors++; /* stage's fft length/p */
518 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
520 ki_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
522 /* Compensate for longer twiddles table (when sharing) */
524 fstride <<= st->shift;
526 case 2: ki_bfly2(Fout,fstride,st,m, N, m2); break;
527 case 4: ki_bfly4(Fout,fstride,st,m, N, m2); break;
528 #ifndef RADIX_TWO_ONLY
529 case 3: ki_bfly3(Fout,fstride,st,m, N, m2); break;
530 case 5: ki_bfly5(Fout,fstride,st,m, N, m2); break;
532 default: celt_fatal("kiss_fft: only powers of two enabled");
537 /* facbuf is populated by p1,m1,p2,m2, ...
542 int kf_factor(int n,celt_int16 * facbuf)
546 /*factor out powers of 4, powers of 2, then any remaining primes */
550 case 4: p = 2; break;
551 case 2: p = 3; break;
552 default: p += 2; break;
554 if (p>32000 || (celt_int32)p*(celt_int32)p > n)
555 p = n; /* no more factors, skip to end */
560 celt_warning("Only powers of 2, 3 and 5 are supported");
569 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
573 for (i=0;i<nfft;++i) {
574 celt_word32 phase = -i;
575 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
578 for (i=0;i<nfft;++i) {
579 const double pi=3.14159265358979323846264338327;
580 double phase = ( -2*pi /nfft ) * i;
581 kf_cexp(twiddles+i, phase );
589 * User-callable function to allocate all necessary storage space for the fft.
591 * The return value is a contiguous block of memory, allocated with malloc. As such,
592 * It can be freed with free(), rather than a kiss_fft-specific function.
594 kiss_fft_state *kiss_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
596 kiss_fft_state *st=NULL;
597 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
599 if ( lenmem==NULL ) {
600 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
602 if (mem != NULL && *lenmem >= memneeded)
603 st = (kiss_fft_state*)mem;
608 kiss_twiddle_cpx *twiddles;
616 st->twiddles = base->twiddles;
618 while (nfft<<st->shift != base->nfft && st->shift < 32)
620 /* FIXME: Report error and do proper cleanup */
624 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
625 compute_twiddles(twiddles, nfft);
628 if (!kf_factor(nfft,st->factors))
635 st->bitrev = bitrev = (celt_int16*)KISS_FFT_MALLOC(sizeof(celt_int16)*nfft);
636 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
641 kiss_fft_state *kiss_fft_alloc(int nfft,void * mem,size_t * lenmem )
643 return kiss_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
647 static void kiss_fft_stride(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
651 celt_fatal("In-place FFT not supported");
653 /* Bit-reverse the input */
655 for (i=0;i<st->nfft;i++)
657 fout[st->bitrev[i]] = fin[i];
659 fout[st->bitrev[i]].r *= st->scale;
660 fout[st->bitrev[i]].i *= st->scale;
663 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
667 void kiss_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
669 kiss_fft_stride(cfg,fin,fout,1);
672 static void kiss_ifft_stride(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
676 celt_fatal("In-place FFT not supported");
678 /* Bit-reverse the input */
680 for (i=0;i<st->nfft;i++)
681 fout[st->bitrev[i]] = fin[i];
682 ki_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
686 void kiss_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
688 kiss_ifft_stride(cfg,fin,fout,1);
691 void kiss_fft_free(const kiss_fft_state *cfg)
693 celt_free((celt_int16*)cfg->bitrev);
695 celt_free((kiss_twiddle_cpx*)cfg->twiddles);
696 celt_free((kiss_fft_state*)cfg);