2 Copyright (c) 2003-2004, Mark Borgerding
3 Lots of modifications by JMV
4 Copyright (c) 2005-2007, Jean-Marc Valin
5 Copyright (c) 2008, Jean-Marc Valin, CSIRO
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
11 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 #include "_kiss_fft_guts.h"
26 #include "os_support.h"
29 /* The guts header contains all the multiplication and addition macros that are defined for
30 complex numbers. It also delares the kf_ internal functions.
36 const kiss_fft_cfg st,
43 kiss_twiddle_cpx * tw1;
45 kiss_fft_cpx * Fout_beg = Fout;
48 Fout = Fout_beg + i*mm;
54 Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
55 Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
56 C_MUL (t, *Fout2 , *tw1);
58 C_SUB( *Fout2 , *Fout , t );
69 const kiss_fft_cfg st,
76 kiss_twiddle_cpx * tw1;
79 kiss_fft_cpx * Fout_beg = Fout;
82 Fout = Fout_beg + i*mm;
87 C_MULC (t, *Fout2 , *tw1);
89 C_SUB( *Fout2 , *Fout , t );
100 const kiss_fft_cfg st,
106 kiss_twiddle_cpx *tw1,*tw2,*tw3;
107 kiss_fft_cpx scratch[6];
112 kiss_fft_cpx * Fout_beg = Fout;
115 Fout = Fout_beg + i*mm;
116 tw3 = tw2 = tw1 = st->twiddles;
119 C_MUL4(scratch[0],Fout[m] , *tw1 );
120 C_MUL4(scratch[1],Fout[m2] , *tw2 );
121 C_MUL4(scratch[2],Fout[m3] , *tw3 );
123 Fout->r = PSHR(Fout->r, 2);
124 Fout->i = PSHR(Fout->i, 2);
125 C_SUB( scratch[5] , *Fout, scratch[1] );
126 C_ADDTO(*Fout, scratch[1]);
127 C_ADD( scratch[3] , scratch[0] , scratch[2] );
128 C_SUB( scratch[4] , scratch[0] , scratch[2] );
129 Fout[m2].r = PSHR(Fout[m2].r, 2);
130 Fout[m2].i = PSHR(Fout[m2].i, 2);
131 C_SUB( Fout[m2], *Fout, scratch[3] );
135 C_ADDTO( *Fout , scratch[3] );
137 Fout[m].r = scratch[5].r + scratch[4].i;
138 Fout[m].i = scratch[5].i - scratch[4].r;
139 Fout[m3].r = scratch[5].r - scratch[4].i;
140 Fout[m3].i = scratch[5].i + scratch[4].r;
146 static void ki_bfly4(
148 const size_t fstride,
149 const kiss_fft_cfg st,
155 kiss_twiddle_cpx *tw1,*tw2,*tw3;
156 kiss_fft_cpx scratch[6];
161 kiss_fft_cpx * Fout_beg = Fout;
164 Fout = Fout_beg + i*mm;
165 tw3 = tw2 = tw1 = st->twiddles;
168 C_MULC(scratch[0],Fout[m] , *tw1 );
169 C_MULC(scratch[1],Fout[m2] , *tw2 );
170 C_MULC(scratch[2],Fout[m3] , *tw3 );
172 C_SUB( scratch[5] , *Fout, scratch[1] );
173 C_ADDTO(*Fout, scratch[1]);
174 C_ADD( scratch[3] , scratch[0] , scratch[2] );
175 C_SUB( scratch[4] , scratch[0] , scratch[2] );
176 C_SUB( Fout[m2], *Fout, scratch[3] );
180 C_ADDTO( *Fout , scratch[3] );
182 Fout[m].r = scratch[5].r - scratch[4].i;
183 Fout[m].i = scratch[5].i + scratch[4].r;
184 Fout[m3].r = scratch[5].r + scratch[4].i;
185 Fout[m3].i = scratch[5].i - scratch[4].r;
191 #ifndef RADIX_TWO_ONLY
193 static void kf_bfly3(
195 const size_t fstride,
196 const kiss_fft_cfg st,
201 const size_t m2 = 2*m;
202 kiss_twiddle_cpx *tw1,*tw2;
203 kiss_fft_cpx scratch[5];
204 kiss_twiddle_cpx epi3;
205 epi3 = st->twiddles[fstride*m];
207 tw1=tw2=st->twiddles;
209 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
211 C_MUL(scratch[1],Fout[m] , *tw1);
212 C_MUL(scratch[2],Fout[m2] , *tw2);
214 C_ADD(scratch[3],scratch[1],scratch[2]);
215 C_SUB(scratch[0],scratch[1],scratch[2]);
219 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
220 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
222 C_MULBYSCALAR( scratch[0] , epi3.i );
224 C_ADDTO(*Fout,scratch[3]);
226 Fout[m2].r = Fout[m].r + scratch[0].i;
227 Fout[m2].i = Fout[m].i - scratch[0].r;
229 Fout[m].r -= scratch[0].i;
230 Fout[m].i += scratch[0].r;
236 static void ki_bfly3(
238 const size_t fstride,
239 const kiss_fft_cfg st,
244 const size_t m2 = 2*m;
245 kiss_twiddle_cpx *tw1,*tw2;
246 kiss_fft_cpx scratch[5];
247 kiss_twiddle_cpx epi3;
248 epi3 = st->twiddles[fstride*m];
250 tw1=tw2=st->twiddles;
253 C_MULC(scratch[1],Fout[m] , *tw1);
254 C_MULC(scratch[2],Fout[m2] , *tw2);
256 C_ADD(scratch[3],scratch[1],scratch[2]);
257 C_SUB(scratch[0],scratch[1],scratch[2]);
261 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
262 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
264 C_MULBYSCALAR( scratch[0] , -epi3.i );
266 C_ADDTO(*Fout,scratch[3]);
268 Fout[m2].r = Fout[m].r + scratch[0].i;
269 Fout[m2].i = Fout[m].i - scratch[0].r;
271 Fout[m].r -= scratch[0].i;
272 Fout[m].i += scratch[0].r;
279 static void kf_bfly5(
281 const size_t fstride,
282 const kiss_fft_cfg st,
286 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
288 kiss_fft_cpx scratch[13];
289 kiss_twiddle_cpx * twiddles = st->twiddles;
290 kiss_twiddle_cpx *tw;
291 kiss_twiddle_cpx ya,yb;
292 ya = twiddles[fstride*m];
293 yb = twiddles[fstride*2*m];
302 for ( u=0; u<m; ++u ) {
303 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
306 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
307 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
308 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
309 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
311 C_ADD( scratch[7],scratch[1],scratch[4]);
312 C_SUB( scratch[10],scratch[1],scratch[4]);
313 C_ADD( scratch[8],scratch[2],scratch[3]);
314 C_SUB( scratch[9],scratch[2],scratch[3]);
316 Fout0->r += scratch[7].r + scratch[8].r;
317 Fout0->i += scratch[7].i + scratch[8].i;
319 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
320 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
322 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
323 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
325 C_SUB(*Fout1,scratch[5],scratch[6]);
326 C_ADD(*Fout4,scratch[5],scratch[6]);
328 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
329 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
330 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
331 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
333 C_ADD(*Fout2,scratch[11],scratch[12]);
334 C_SUB(*Fout3,scratch[11],scratch[12]);
336 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
340 static void ki_bfly5(
342 const size_t fstride,
343 const kiss_fft_cfg st,
347 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
349 kiss_fft_cpx scratch[13];
350 kiss_twiddle_cpx * twiddles = st->twiddles;
351 kiss_twiddle_cpx *tw;
352 kiss_twiddle_cpx ya,yb;
353 ya = twiddles[fstride*m];
354 yb = twiddles[fstride*2*m];
363 for ( u=0; u<m; ++u ) {
366 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
367 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
368 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
369 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
371 C_ADD( scratch[7],scratch[1],scratch[4]);
372 C_SUB( scratch[10],scratch[1],scratch[4]);
373 C_ADD( scratch[8],scratch[2],scratch[3]);
374 C_SUB( scratch[9],scratch[2],scratch[3]);
376 Fout0->r += scratch[7].r + scratch[8].r;
377 Fout0->i += scratch[7].i + scratch[8].i;
379 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
380 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
382 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
383 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
385 C_SUB(*Fout1,scratch[5],scratch[6]);
386 C_ADD(*Fout4,scratch[5],scratch[6]);
388 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
389 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
390 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
391 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
393 C_ADD(*Fout2,scratch[11],scratch[12]);
394 C_SUB(*Fout3,scratch[11],scratch[12]);
396 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
400 /* perform the butterfly for one stage of a mixed radix FFT */
401 static void kf_bfly_generic(
403 const size_t fstride,
404 const kiss_fft_cfg st,
410 kiss_twiddle_cpx * twiddles = st->twiddles;
412 kiss_fft_cpx scratchbuf[17];
413 int Norig = st->nfft;
415 /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
417 celt_fatal("KissFFT: max radix supported is 17");
419 for ( u=0; u<m; ++u ) {
421 for ( q1=0 ; q1<p ; ++q1 ) {
422 scratchbuf[q1] = Fout[ k ];
423 C_FIXDIV(scratchbuf[q1],p);
428 for ( q1=0 ; q1<p ; ++q1 ) {
430 Fout[ k ] = scratchbuf[0];
432 twidx += fstride * k;
433 if (twidx>=Norig) twidx-=Norig;
434 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
435 C_ADDTO( Fout[ k ] ,t);
442 static void ki_bfly_generic(
444 const size_t fstride,
445 const kiss_fft_cfg st,
451 kiss_twiddle_cpx * twiddles = st->twiddles;
453 kiss_fft_cpx scratchbuf[17];
454 int Norig = st->nfft;
456 /*CHECKBUF(scratchbuf,nscratchbuf,p);*/
458 celt_fatal("KissFFT: max radix supported is 17");
460 for ( u=0; u<m; ++u ) {
462 for ( q1=0 ; q1<p ; ++q1 ) {
463 scratchbuf[q1] = Fout[ k ];
468 for ( q1=0 ; q1<p ; ++q1 ) {
470 Fout[ k ] = scratchbuf[0];
472 twidx += fstride * k;
473 if (twidx>=Norig) twidx-=Norig;
474 C_MULC(t,scratchbuf[q] , twiddles[twidx] );
475 C_ADDTO( Fout[ k ] ,t);
484 void compute_bitrev_table(
487 const size_t fstride,
490 const kiss_fft_cfg st
493 const int p=*factors++; /* the radix */
494 const int m=*factors++; /* stage's fft length/p */
496 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
503 f += fstride*in_stride;
509 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
510 f += fstride*in_stride;
519 const kiss_fft_cpx * f,
520 const size_t fstride,
523 const kiss_fft_cfg st,
529 #ifndef RADIX_TWO_ONLY
531 kiss_fft_cpx * Fout_beg=Fout;
533 const int p=*factors++; /* the radix */
534 const int m=*factors++; /* stage's fft length/p */
535 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
537 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
540 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
541 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
542 #ifndef RADIX_TWO_ONLY
543 case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break;
544 case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break;
545 default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break;
547 default: celt_fatal("kiss_fft: only powers of two enabled");
555 const kiss_fft_cpx * f,
556 const size_t fstride,
559 const kiss_fft_cfg st,
565 #ifndef RADIX_TWO_ONLY
567 kiss_fft_cpx * Fout_beg=Fout;
569 const int p=*factors++; /* the radix */
570 const int m=*factors++; /* stage's fft length/p */
571 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
573 ki_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
576 case 2: ki_bfly2(Fout,fstride,st,m, N, m2); break;
577 case 4: ki_bfly4(Fout,fstride,st,m, N, m2); break;
578 #ifndef RADIX_TWO_ONLY
579 case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly3(Fout,fstride,st,m);} break;
580 case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly5(Fout,fstride,st,m);} break;
581 default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly_generic(Fout,fstride,st,m,p);} break;
583 default: celt_fatal("kiss_fft: only powers of two enabled");
588 /* facbuf is populated by p1,m1,p2,m2, ...
593 void kf_factor(int n,int * facbuf)
597 /*factor out powers of 4, powers of 2, then any remaining primes */
601 case 4: p = 2; break;
602 case 2: p = 3; break;
603 default: p += 2; break;
605 if (p>32000 || (celt_int32_t)p*(celt_int32_t)p > n)
606 p = n; /* no more factors, skip to end */
615 * User-callable function to allocate all necessary storage space for the fft.
617 * The return value is a contiguous block of memory, allocated with malloc. As such,
618 * It can be freed with free(), rather than a kiss_fft-specific function.
620 kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem )
622 kiss_fft_cfg st=NULL;
623 size_t memneeded = sizeof(struct kiss_fft_state)
624 + sizeof(kiss_twiddle_cpx)*(nfft-1) + sizeof(int)*nfft; /* twiddle factors*/
626 if ( lenmem==NULL ) {
627 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
629 if (mem != NULL && *lenmem >= memneeded)
630 st = (kiss_fft_cfg)mem;
639 #if defined(FIXED_POINT) && (!defined(DOUBLE_PRECISION) || defined(MIXED_PRECISION))
640 for (i=0;i<nfft;++i) {
641 celt_word32_t phase = -i;
642 kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
645 for (i=0;i<nfft;++i) {
646 const double pi=3.14159265358979323846264338327;
647 double phase = ( -2*pi /nfft ) * i;
648 kf_cexp(st->twiddles+i, phase );
651 kf_factor(nfft,st->factors);
654 st->bitrev = (int*)((char*)st + memneeded - sizeof(int)*nfft);
655 compute_bitrev_table(0, st->bitrev, 1,1, st->factors,st);
663 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
667 celt_fatal("In-place FFT not supported");
669 /* Bit-reverse the input */
671 for (i=0;i<st->nfft;i++)
673 fout[st->bitrev[i]] = fin[i];
675 fout[st->bitrev[i]].r *= st->scale;
676 fout[st->bitrev[i]].i *= st->scale;
679 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
683 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
685 kiss_fft_stride(cfg,fin,fout,1);
688 void kiss_ifft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
692 celt_fatal("In-place FFT not supported");
694 /* Bit-reverse the input */
696 for (i=0;i<st->nfft;i++)
697 fout[st->bitrev[i]] = fin[i];
698 ki_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
702 void kiss_ifft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
704 kiss_ifft_stride(cfg,fin,fout,1);