2 Copyright (c) 2003-2004, Mark Borgerding
3 Lots of modifications by Jean-Marc Valin
4 Copyright (c) 2005-2007, Xiph.Org Foundation
5 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
11 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 #include "_kiss_fft_guts.h"
25 #include "os_support.h"
27 #include "stack_alloc.h"
29 /* The guts header contains all the multiplication and addition macros that are defined for
30 complex numbers. It also delares the kf_ internal functions.
36 const kiss_fft_state *st,
43 const kiss_twiddle_cpx * tw1;
45 kiss_fft_cpx * Fout_beg = Fout;
48 Fout = Fout_beg + i*mm;
54 Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
55 Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
56 C_MUL (t, *Fout2 , *tw1);
58 C_SUB( *Fout2 , *Fout , t );
69 const kiss_fft_state *st,
76 const kiss_twiddle_cpx * tw1;
79 kiss_fft_cpx * Fout_beg = Fout;
82 Fout = Fout_beg + i*mm;
87 C_MULC (t, *Fout2 , *tw1);
89 C_SUB( *Fout2 , *Fout , t );
100 const kiss_fft_state *st,
106 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
107 kiss_fft_cpx scratch[6];
112 kiss_fft_cpx * Fout_beg = Fout;
115 Fout = Fout_beg + i*mm;
116 tw3 = tw2 = tw1 = st->twiddles;
119 C_MUL4(scratch[0],Fout[m] , *tw1 );
120 C_MUL4(scratch[1],Fout[m2] , *tw2 );
121 C_MUL4(scratch[2],Fout[m3] , *tw3 );
123 Fout->r = PSHR(Fout->r, 2);
124 Fout->i = PSHR(Fout->i, 2);
125 C_SUB( scratch[5] , *Fout, scratch[1] );
126 C_ADDTO(*Fout, scratch[1]);
127 C_ADD( scratch[3] , scratch[0] , scratch[2] );
128 C_SUB( scratch[4] , scratch[0] , scratch[2] );
129 Fout[m2].r = PSHR(Fout[m2].r, 2);
130 Fout[m2].i = PSHR(Fout[m2].i, 2);
131 C_SUB( Fout[m2], *Fout, scratch[3] );
135 C_ADDTO( *Fout , scratch[3] );
137 Fout[m].r = scratch[5].r + scratch[4].i;
138 Fout[m].i = scratch[5].i - scratch[4].r;
139 Fout[m3].r = scratch[5].r - scratch[4].i;
140 Fout[m3].i = scratch[5].i + scratch[4].r;
146 static void ki_bfly4(
148 const size_t fstride,
149 const kiss_fft_state *st,
155 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
156 kiss_fft_cpx scratch[6];
161 kiss_fft_cpx * Fout_beg = Fout;
164 Fout = Fout_beg + i*mm;
165 tw3 = tw2 = tw1 = st->twiddles;
168 C_MULC(scratch[0],Fout[m] , *tw1 );
169 C_MULC(scratch[1],Fout[m2] , *tw2 );
170 C_MULC(scratch[2],Fout[m3] , *tw3 );
172 C_SUB( scratch[5] , *Fout, scratch[1] );
173 C_ADDTO(*Fout, scratch[1]);
174 C_ADD( scratch[3] , scratch[0] , scratch[2] );
175 C_SUB( scratch[4] , scratch[0] , scratch[2] );
176 C_SUB( Fout[m2], *Fout, scratch[3] );
180 C_ADDTO( *Fout , scratch[3] );
182 Fout[m].r = scratch[5].r - scratch[4].i;
183 Fout[m].i = scratch[5].i + scratch[4].r;
184 Fout[m3].r = scratch[5].r + scratch[4].i;
185 Fout[m3].i = scratch[5].i - scratch[4].r;
191 #ifndef RADIX_TWO_ONLY
193 static void kf_bfly3(
195 const size_t fstride,
196 const kiss_fft_state *st,
204 const size_t m2 = 2*m;
205 const kiss_twiddle_cpx *tw1,*tw2;
206 kiss_fft_cpx scratch[5];
207 kiss_twiddle_cpx epi3;
209 kiss_fft_cpx * Fout_beg = Fout;
210 epi3 = st->twiddles[fstride*m];
213 Fout = Fout_beg + i*mm;
214 tw1=tw2=st->twiddles;
217 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
219 C_MUL(scratch[1],Fout[m] , *tw1);
220 C_MUL(scratch[2],Fout[m2] , *tw2);
222 C_ADD(scratch[3],scratch[1],scratch[2]);
223 C_SUB(scratch[0],scratch[1],scratch[2]);
227 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
228 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
230 C_MULBYSCALAR( scratch[0] , epi3.i );
232 C_ADDTO(*Fout,scratch[3]);
234 Fout[m2].r = Fout[m].r + scratch[0].i;
235 Fout[m2].i = Fout[m].i - scratch[0].r;
237 Fout[m].r -= scratch[0].i;
238 Fout[m].i += scratch[0].r;
245 static void ki_bfly3(
247 const size_t fstride,
248 const kiss_fft_state *st,
255 const size_t m2 = 2*m;
256 const kiss_twiddle_cpx *tw1,*tw2;
257 kiss_fft_cpx scratch[5];
258 kiss_twiddle_cpx epi3;
260 kiss_fft_cpx * Fout_beg = Fout;
261 epi3 = st->twiddles[fstride*m];
264 Fout = Fout_beg + i*mm;
265 tw1=tw2=st->twiddles;
269 C_MULC(scratch[1],Fout[m] , *tw1);
270 C_MULC(scratch[2],Fout[m2] , *tw2);
272 C_ADD(scratch[3],scratch[1],scratch[2]);
273 C_SUB(scratch[0],scratch[1],scratch[2]);
277 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
278 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
280 C_MULBYSCALAR( scratch[0] , -epi3.i );
282 C_ADDTO(*Fout,scratch[3]);
284 Fout[m2].r = Fout[m].r + scratch[0].i;
285 Fout[m2].i = Fout[m].i - scratch[0].r;
287 Fout[m].r -= scratch[0].i;
288 Fout[m].i += scratch[0].r;
296 static void kf_bfly5(
298 const size_t fstride,
299 const kiss_fft_state *st,
305 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
307 kiss_fft_cpx scratch[13];
308 const kiss_twiddle_cpx * twiddles = st->twiddles;
309 const kiss_twiddle_cpx *tw;
310 kiss_twiddle_cpx ya,yb;
311 kiss_fft_cpx * Fout_beg = Fout;
313 ya = twiddles[fstride*m];
314 yb = twiddles[fstride*2*m];
319 Fout = Fout_beg + i*mm;
326 for ( u=0; u<m; ++u ) {
327 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
330 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
331 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
332 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
333 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
335 C_ADD( scratch[7],scratch[1],scratch[4]);
336 C_SUB( scratch[10],scratch[1],scratch[4]);
337 C_ADD( scratch[8],scratch[2],scratch[3]);
338 C_SUB( scratch[9],scratch[2],scratch[3]);
340 Fout0->r += scratch[7].r + scratch[8].r;
341 Fout0->i += scratch[7].i + scratch[8].i;
343 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
344 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
346 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
347 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
349 C_SUB(*Fout1,scratch[5],scratch[6]);
350 C_ADD(*Fout4,scratch[5],scratch[6]);
352 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
353 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
354 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
355 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
357 C_ADD(*Fout2,scratch[11],scratch[12]);
358 C_SUB(*Fout3,scratch[11],scratch[12]);
360 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
365 static void ki_bfly5(
367 const size_t fstride,
368 const kiss_fft_state *st,
374 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
376 kiss_fft_cpx scratch[13];
377 const kiss_twiddle_cpx * twiddles = st->twiddles;
378 const kiss_twiddle_cpx *tw;
379 kiss_twiddle_cpx ya,yb;
380 kiss_fft_cpx * Fout_beg = Fout;
382 ya = twiddles[fstride*m];
383 yb = twiddles[fstride*2*m];
388 Fout = Fout_beg + i*mm;
395 for ( u=0; u<m; ++u ) {
398 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
399 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
400 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
401 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
403 C_ADD( scratch[7],scratch[1],scratch[4]);
404 C_SUB( scratch[10],scratch[1],scratch[4]);
405 C_ADD( scratch[8],scratch[2],scratch[3]);
406 C_SUB( scratch[9],scratch[2],scratch[3]);
408 Fout0->r += scratch[7].r + scratch[8].r;
409 Fout0->i += scratch[7].i + scratch[8].i;
411 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
412 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
414 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
415 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
417 C_SUB(*Fout1,scratch[5],scratch[6]);
418 C_ADD(*Fout4,scratch[5],scratch[6]);
420 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
421 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
422 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
423 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
425 C_ADD(*Fout2,scratch[11],scratch[12]);
426 C_SUB(*Fout3,scratch[11],scratch[12]);
428 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
437 const kiss_fft_cpx * f,
440 const opus_int16 * factors,
441 const kiss_fft_state *st,
446 const int p=*factors++; /* the radix */
447 const int m=*factors++; /* stage's fft length/p */
448 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
450 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, m);
452 /* Compensate for longer twiddles table (when sharing) */
454 fstride <<= st->shift;
456 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
457 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
458 #ifndef RADIX_TWO_ONLY
459 case 3: kf_bfly3(Fout,fstride,st,m, N, m2); break;
460 case 5: kf_bfly5(Fout,fstride,st,m, N, m2); break;
468 const kiss_fft_cpx * f,
471 const opus_int16 * factors,
472 const kiss_fft_state *st,
477 const int p=*factors++; /* the radix */
478 const int m=*factors++; /* stage's fft length/p */
479 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
481 ki_work( Fout , f, fstride*p, in_stride, factors,st, N*p, m);
483 /* Compensate for longer twiddles table (when sharing) */
485 fstride <<= st->shift;
487 case 2: ki_bfly2(Fout,fstride,st,m, N, m2); break;
488 case 4: ki_bfly4(Fout,fstride,st,m, N, m2); break;
489 #ifndef RADIX_TWO_ONLY
490 case 3: ki_bfly3(Fout,fstride,st,m, N, m2); break;
491 case 5: ki_bfly5(Fout,fstride,st,m, N, m2); break;
500 void compute_bitrev_table(
503 const size_t fstride,
505 opus_int16 * factors,
506 const kiss_fft_state *st
509 const int p=*factors++; /* the radix */
510 const int m=*factors++; /* stage's fft length/p */
512 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
519 f += fstride*in_stride;
525 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
526 f += fstride*in_stride;
533 /* facbuf is populated by p1,m1,p2,m2, ...
538 int kf_factor(int n,opus_int16 * facbuf)
542 /*factor out powers of 4, powers of 2, then any remaining primes */
546 case 4: p = 2; break;
547 case 2: p = 3; break;
548 default: p += 2; break;
550 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
551 p = n; /* no more factors, skip to end */
554 #ifdef RADIX_TWO_ONLY
568 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
572 for (i=0;i<nfft;++i) {
573 opus_val32 phase = -i;
574 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
577 for (i=0;i<nfft;++i) {
578 const double pi=3.14159265358979323846264338327;
579 double phase = ( -2*pi /nfft ) * i;
580 kf_cexp(twiddles+i, phase );
588 * User-callable function to allocate all necessary storage space for the fft.
590 * The return value is a contiguous block of memory, allocated with malloc. As such,
591 * It can be freed with free(), rather than a kiss_fft-specific function.
593 kiss_fft_state *kiss_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
595 kiss_fft_state *st=NULL;
596 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
598 if ( lenmem==NULL ) {
599 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
601 if (mem != NULL && *lenmem >= memneeded)
602 st = (kiss_fft_state*)mem;
607 kiss_twiddle_cpx *twiddles;
615 st->twiddles = base->twiddles;
617 while (nfft<<st->shift != base->nfft && st->shift < 32)
622 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
623 compute_twiddles(twiddles, nfft);
626 if (!kf_factor(nfft,st->factors))
633 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
634 if (st->bitrev==NULL)
636 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
644 kiss_fft_state *kiss_fft_alloc(int nfft,void * mem,size_t * lenmem )
646 return kiss_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
649 void kiss_fft_free(const kiss_fft_state *cfg)
653 celt_free((opus_int16*)cfg->bitrev);
655 celt_free((kiss_twiddle_cpx*)cfg->twiddles);
656 celt_free((kiss_fft_state*)cfg);
660 #endif /* CUSTOM_MODES */
662 static void kiss_fft_stride(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
665 celt_assert2 (fin != fout, "In-place FFT not supported");
666 /* Bit-reverse the input */
667 for (i=0;i<st->nfft;i++)
669 fout[st->bitrev[i]] = fin[i];
671 fout[st->bitrev[i]].r *= st->scale;
672 fout[st->bitrev[i]].i *= st->scale;
675 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, 1);
678 void kiss_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
680 kiss_fft_stride(cfg,fin,fout,1);
683 static void kiss_ifft_stride(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
686 celt_assert2 (fin != fout, "In-place FFT not supported");
687 /* Bit-reverse the input */
688 for (i=0;i<st->nfft;i++)
689 fout[st->bitrev[i]] = fin[i];
690 ki_work( fout, fin, 1,in_stride, st->factors,st, 1, 1);
693 void kiss_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
695 kiss_ifft_stride(cfg,fin,fout,1);