2 Copyright (c) 2003-2004, Mark Borgerding
3 Lots of modifications by Jean-Marc Valin
4 Copyright (c) 2005-2007, Xiph.Org Foundation
5 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
11 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 #include "_kiss_fft_guts.h"
26 #include "os_support.h"
28 #include "stack_alloc.h"
30 /* The guts header contains all the multiplication and addition macros that are defined for
31 complex numbers. It also delares the kf_ internal functions.
37 const kiss_fft_state *st,
44 const kiss_twiddle_cpx * tw1;
46 kiss_fft_cpx * Fout_beg = Fout;
49 Fout = Fout_beg + i*mm;
55 Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
56 Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
57 C_MUL (t, *Fout2 , *tw1);
59 C_SUB( *Fout2 , *Fout , t );
70 const kiss_fft_state *st,
77 const kiss_twiddle_cpx * tw1;
80 kiss_fft_cpx * Fout_beg = Fout;
83 Fout = Fout_beg + i*mm;
88 C_MULC (t, *Fout2 , *tw1);
90 C_SUB( *Fout2 , *Fout , t );
100 const size_t fstride,
101 const kiss_fft_state *st,
107 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
108 kiss_fft_cpx scratch[6];
113 kiss_fft_cpx * Fout_beg = Fout;
116 Fout = Fout_beg + i*mm;
117 tw3 = tw2 = tw1 = st->twiddles;
120 C_MUL4(scratch[0],Fout[m] , *tw1 );
121 C_MUL4(scratch[1],Fout[m2] , *tw2 );
122 C_MUL4(scratch[2],Fout[m3] , *tw3 );
124 Fout->r = PSHR(Fout->r, 2);
125 Fout->i = PSHR(Fout->i, 2);
126 C_SUB( scratch[5] , *Fout, scratch[1] );
127 C_ADDTO(*Fout, scratch[1]);
128 C_ADD( scratch[3] , scratch[0] , scratch[2] );
129 C_SUB( scratch[4] , scratch[0] , scratch[2] );
130 Fout[m2].r = PSHR(Fout[m2].r, 2);
131 Fout[m2].i = PSHR(Fout[m2].i, 2);
132 C_SUB( Fout[m2], *Fout, scratch[3] );
136 C_ADDTO( *Fout , scratch[3] );
138 Fout[m].r = scratch[5].r + scratch[4].i;
139 Fout[m].i = scratch[5].i - scratch[4].r;
140 Fout[m3].r = scratch[5].r - scratch[4].i;
141 Fout[m3].i = scratch[5].i + scratch[4].r;
147 static void ki_bfly4(
149 const size_t fstride,
150 const kiss_fft_state *st,
156 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
157 kiss_fft_cpx scratch[6];
162 kiss_fft_cpx * Fout_beg = Fout;
165 Fout = Fout_beg + i*mm;
166 tw3 = tw2 = tw1 = st->twiddles;
169 C_MULC(scratch[0],Fout[m] , *tw1 );
170 C_MULC(scratch[1],Fout[m2] , *tw2 );
171 C_MULC(scratch[2],Fout[m3] , *tw3 );
173 C_SUB( scratch[5] , *Fout, scratch[1] );
174 C_ADDTO(*Fout, scratch[1]);
175 C_ADD( scratch[3] , scratch[0] , scratch[2] );
176 C_SUB( scratch[4] , scratch[0] , scratch[2] );
177 C_SUB( Fout[m2], *Fout, scratch[3] );
181 C_ADDTO( *Fout , scratch[3] );
183 Fout[m].r = scratch[5].r - scratch[4].i;
184 Fout[m].i = scratch[5].i + scratch[4].r;
185 Fout[m3].r = scratch[5].r + scratch[4].i;
186 Fout[m3].i = scratch[5].i - scratch[4].r;
192 #ifndef RADIX_TWO_ONLY
194 static void kf_bfly3(
196 const size_t fstride,
197 const kiss_fft_state *st,
205 const size_t m2 = 2*m;
206 const kiss_twiddle_cpx *tw1,*tw2;
207 kiss_fft_cpx scratch[5];
208 kiss_twiddle_cpx epi3;
210 kiss_fft_cpx * Fout_beg = Fout;
211 epi3 = st->twiddles[fstride*m];
214 Fout = Fout_beg + i*mm;
215 tw1=tw2=st->twiddles;
218 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
220 C_MUL(scratch[1],Fout[m] , *tw1);
221 C_MUL(scratch[2],Fout[m2] , *tw2);
223 C_ADD(scratch[3],scratch[1],scratch[2]);
224 C_SUB(scratch[0],scratch[1],scratch[2]);
228 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
229 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
231 C_MULBYSCALAR( scratch[0] , epi3.i );
233 C_ADDTO(*Fout,scratch[3]);
235 Fout[m2].r = Fout[m].r + scratch[0].i;
236 Fout[m2].i = Fout[m].i - scratch[0].r;
238 Fout[m].r -= scratch[0].i;
239 Fout[m].i += scratch[0].r;
246 static void ki_bfly3(
248 const size_t fstride,
249 const kiss_fft_state *st,
256 const size_t m2 = 2*m;
257 const kiss_twiddle_cpx *tw1,*tw2;
258 kiss_fft_cpx scratch[5];
259 kiss_twiddle_cpx epi3;
261 kiss_fft_cpx * Fout_beg = Fout;
262 epi3 = st->twiddles[fstride*m];
265 Fout = Fout_beg + i*mm;
266 tw1=tw2=st->twiddles;
270 C_MULC(scratch[1],Fout[m] , *tw1);
271 C_MULC(scratch[2],Fout[m2] , *tw2);
273 C_ADD(scratch[3],scratch[1],scratch[2]);
274 C_SUB(scratch[0],scratch[1],scratch[2]);
278 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
279 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
281 C_MULBYSCALAR( scratch[0] , -epi3.i );
283 C_ADDTO(*Fout,scratch[3]);
285 Fout[m2].r = Fout[m].r + scratch[0].i;
286 Fout[m2].i = Fout[m].i - scratch[0].r;
288 Fout[m].r -= scratch[0].i;
289 Fout[m].i += scratch[0].r;
297 static void kf_bfly5(
299 const size_t fstride,
300 const kiss_fft_state *st,
306 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
308 kiss_fft_cpx scratch[13];
309 const kiss_twiddle_cpx * twiddles = st->twiddles;
310 const kiss_twiddle_cpx *tw;
311 kiss_twiddle_cpx ya,yb;
312 kiss_fft_cpx * Fout_beg = Fout;
314 ya = twiddles[fstride*m];
315 yb = twiddles[fstride*2*m];
320 Fout = Fout_beg + i*mm;
327 for ( u=0; u<m; ++u ) {
328 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
331 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
332 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
333 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
334 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
336 C_ADD( scratch[7],scratch[1],scratch[4]);
337 C_SUB( scratch[10],scratch[1],scratch[4]);
338 C_ADD( scratch[8],scratch[2],scratch[3]);
339 C_SUB( scratch[9],scratch[2],scratch[3]);
341 Fout0->r += scratch[7].r + scratch[8].r;
342 Fout0->i += scratch[7].i + scratch[8].i;
344 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
345 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
347 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
348 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
350 C_SUB(*Fout1,scratch[5],scratch[6]);
351 C_ADD(*Fout4,scratch[5],scratch[6]);
353 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
354 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
355 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
356 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
358 C_ADD(*Fout2,scratch[11],scratch[12]);
359 C_SUB(*Fout3,scratch[11],scratch[12]);
361 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
366 static void ki_bfly5(
368 const size_t fstride,
369 const kiss_fft_state *st,
375 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
377 kiss_fft_cpx scratch[13];
378 const kiss_twiddle_cpx * twiddles = st->twiddles;
379 const kiss_twiddle_cpx *tw;
380 kiss_twiddle_cpx ya,yb;
381 kiss_fft_cpx * Fout_beg = Fout;
383 ya = twiddles[fstride*m];
384 yb = twiddles[fstride*2*m];
389 Fout = Fout_beg + i*mm;
396 for ( u=0; u<m; ++u ) {
399 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
400 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
401 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
402 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
404 C_ADD( scratch[7],scratch[1],scratch[4]);
405 C_SUB( scratch[10],scratch[1],scratch[4]);
406 C_ADD( scratch[8],scratch[2],scratch[3]);
407 C_SUB( scratch[9],scratch[2],scratch[3]);
409 Fout0->r += scratch[7].r + scratch[8].r;
410 Fout0->i += scratch[7].i + scratch[8].i;
412 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
413 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
415 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
416 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
418 C_SUB(*Fout1,scratch[5],scratch[6]);
419 C_ADD(*Fout4,scratch[5],scratch[6]);
421 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
422 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
423 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
424 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
426 C_ADD(*Fout2,scratch[11],scratch[12]);
427 C_SUB(*Fout3,scratch[11],scratch[12]);
429 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
438 const kiss_fft_cpx * f,
441 const celt_int16 * factors,
442 const kiss_fft_state *st,
447 const int p=*factors++; /* the radix */
448 const int m=*factors++; /* stage's fft length/p */
449 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
451 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, m);
453 /* Compensate for longer twiddles table (when sharing) */
455 fstride <<= st->shift;
457 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
458 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
459 #ifndef RADIX_TWO_ONLY
460 case 3: kf_bfly3(Fout,fstride,st,m, N, m2); break;
461 case 5: kf_bfly5(Fout,fstride,st,m, N, m2); break;
469 const kiss_fft_cpx * f,
472 const celt_int16 * factors,
473 const kiss_fft_state *st,
478 const int p=*factors++; /* the radix */
479 const int m=*factors++; /* stage's fft length/p */
480 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
482 ki_work( Fout , f, fstride*p, in_stride, factors,st, N*p, m);
484 /* Compensate for longer twiddles table (when sharing) */
486 fstride <<= st->shift;
488 case 2: ki_bfly2(Fout,fstride,st,m, N, m2); break;
489 case 4: ki_bfly4(Fout,fstride,st,m, N, m2); break;
490 #ifndef RADIX_TWO_ONLY
491 case 3: ki_bfly3(Fout,fstride,st,m, N, m2); break;
492 case 5: ki_bfly5(Fout,fstride,st,m, N, m2); break;
501 void compute_bitrev_table(
504 const size_t fstride,
506 celt_int16 * factors,
507 const kiss_fft_state *st
510 const int p=*factors++; /* the radix */
511 const int m=*factors++; /* stage's fft length/p */
513 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
520 f += fstride*in_stride;
526 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
527 f += fstride*in_stride;
534 /* facbuf is populated by p1,m1,p2,m2, ...
539 int kf_factor(int n,celt_int16 * facbuf)
543 /*factor out powers of 4, powers of 2, then any remaining primes */
547 case 4: p = 2; break;
548 case 2: p = 3; break;
549 default: p += 2; break;
551 if (p>32000 || (celt_int32)p*(celt_int32)p > n)
552 p = n; /* no more factors, skip to end */
555 #ifdef RADIX_TWO_ONLY
569 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
573 for (i=0;i<nfft;++i) {
574 celt_word32 phase = -i;
575 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
578 for (i=0;i<nfft;++i) {
579 const double pi=3.14159265358979323846264338327;
580 double phase = ( -2*pi /nfft ) * i;
581 kf_cexp(twiddles+i, phase );
589 * User-callable function to allocate all necessary storage space for the fft.
591 * The return value is a contiguous block of memory, allocated with malloc. As such,
592 * It can be freed with free(), rather than a kiss_fft-specific function.
594 kiss_fft_state *kiss_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
596 kiss_fft_state *st=NULL;
597 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
599 if ( lenmem==NULL ) {
600 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
602 if (mem != NULL && *lenmem >= memneeded)
603 st = (kiss_fft_state*)mem;
608 kiss_twiddle_cpx *twiddles;
616 st->twiddles = base->twiddles;
618 while (nfft<<st->shift != base->nfft && st->shift < 32)
620 /* FIXME: Report error and do proper cleanup */
624 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
625 compute_twiddles(twiddles, nfft);
628 if (!kf_factor(nfft,st->factors))
635 st->bitrev = bitrev = (celt_int16*)KISS_FFT_MALLOC(sizeof(celt_int16)*nfft);
636 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
641 kiss_fft_state *kiss_fft_alloc(int nfft,void * mem,size_t * lenmem )
643 return kiss_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
646 void kiss_fft_free(const kiss_fft_state *cfg)
648 celt_free((celt_int16*)cfg->bitrev);
650 celt_free((kiss_twiddle_cpx*)cfg->twiddles);
651 celt_free((kiss_fft_state*)cfg);
654 #endif /* CUSTOM_MODES */
656 static void kiss_fft_stride(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
659 celt_assert2 (fin != fout, "In-place FFT not supported");
660 /* Bit-reverse the input */
661 for (i=0;i<st->nfft;i++)
663 fout[st->bitrev[i]] = fin[i];
665 fout[st->bitrev[i]].r *= st->scale;
666 fout[st->bitrev[i]].i *= st->scale;
669 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, 1);
672 void kiss_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
674 kiss_fft_stride(cfg,fin,fout,1);
677 static void kiss_ifft_stride(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
680 celt_assert2 (fin != fout, "In-place FFT not supported");
681 /* Bit-reverse the input */
682 for (i=0;i<st->nfft;i++)
683 fout[st->bitrev[i]] = fin[i];
684 ki_work( fout, fin, 1,in_stride, st->factors,st, 1, 1);
687 void kiss_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
689 kiss_ifft_stride(cfg,fin,fout,1);