1 /*Copyright (c) 2003-2004, Mark Borgerding
2 Lots of modifications by Jean-Marc Valin
3 Copyright (c) 2005-2007, Xiph.Org Foundation
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
11 * Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.*/
29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
30 heavily modified to better suit Opus */
38 #include "_kiss_fft_guts.h"
40 #include "os_support.h"
42 #include "stack_alloc.h"
43 #include "os_support.h"
45 /* The guts header contains all the multiplication and addition macros that are defined for
46 complex numbers. It also delares the kf_ internal functions.
52 const kiss_fft_state *st,
59 const kiss_twiddle_cpx * tw1;
61 kiss_fft_cpx * Fout_beg = Fout;
64 Fout = Fout_beg + i*mm;
70 Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
71 Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
72 C_MUL (t, *Fout2 , *tw1);
74 C_SUB( *Fout2 , *Fout , t );
85 const kiss_fft_state *st,
92 const kiss_twiddle_cpx * tw1;
95 kiss_fft_cpx * Fout_beg = Fout;
98 Fout = Fout_beg + i*mm;
103 C_MULC (t, *Fout2 , *tw1);
105 C_SUB( *Fout2 , *Fout , t );
106 C_ADDTO( *Fout , t );
113 static void kf_bfly4(
115 const size_t fstride,
116 const kiss_fft_state *st,
122 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
123 kiss_fft_cpx scratch[6];
128 kiss_fft_cpx * Fout_beg = Fout;
131 Fout = Fout_beg + i*mm;
132 tw3 = tw2 = tw1 = st->twiddles;
135 C_MUL4(scratch[0],Fout[m] , *tw1 );
136 C_MUL4(scratch[1],Fout[m2] , *tw2 );
137 C_MUL4(scratch[2],Fout[m3] , *tw3 );
139 Fout->r = PSHR(Fout->r, 2);
140 Fout->i = PSHR(Fout->i, 2);
141 C_SUB( scratch[5] , *Fout, scratch[1] );
142 C_ADDTO(*Fout, scratch[1]);
143 C_ADD( scratch[3] , scratch[0] , scratch[2] );
144 C_SUB( scratch[4] , scratch[0] , scratch[2] );
145 Fout[m2].r = PSHR(Fout[m2].r, 2);
146 Fout[m2].i = PSHR(Fout[m2].i, 2);
147 C_SUB( Fout[m2], *Fout, scratch[3] );
151 C_ADDTO( *Fout , scratch[3] );
153 Fout[m].r = scratch[5].r + scratch[4].i;
154 Fout[m].i = scratch[5].i - scratch[4].r;
155 Fout[m3].r = scratch[5].r - scratch[4].i;
156 Fout[m3].i = scratch[5].i + scratch[4].r;
162 static void ki_bfly4(
164 const size_t fstride,
165 const kiss_fft_state *st,
171 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
172 kiss_fft_cpx scratch[6];
177 kiss_fft_cpx * Fout_beg = Fout;
180 Fout = Fout_beg + i*mm;
181 tw3 = tw2 = tw1 = st->twiddles;
184 C_MULC(scratch[0],Fout[m] , *tw1 );
185 C_MULC(scratch[1],Fout[m2] , *tw2 );
186 C_MULC(scratch[2],Fout[m3] , *tw3 );
188 C_SUB( scratch[5] , *Fout, scratch[1] );
189 C_ADDTO(*Fout, scratch[1]);
190 C_ADD( scratch[3] , scratch[0] , scratch[2] );
191 C_SUB( scratch[4] , scratch[0] , scratch[2] );
192 C_SUB( Fout[m2], *Fout, scratch[3] );
196 C_ADDTO( *Fout , scratch[3] );
198 Fout[m].r = scratch[5].r - scratch[4].i;
199 Fout[m].i = scratch[5].i + scratch[4].r;
200 Fout[m3].r = scratch[5].r + scratch[4].i;
201 Fout[m3].i = scratch[5].i - scratch[4].r;
207 #ifndef RADIX_TWO_ONLY
209 static void kf_bfly3(
211 const size_t fstride,
212 const kiss_fft_state *st,
220 const size_t m2 = 2*m;
221 const kiss_twiddle_cpx *tw1,*tw2;
222 kiss_fft_cpx scratch[5];
223 kiss_twiddle_cpx epi3;
225 kiss_fft_cpx * Fout_beg = Fout;
226 epi3 = st->twiddles[fstride*m];
229 Fout = Fout_beg + i*mm;
230 tw1=tw2=st->twiddles;
233 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
235 C_MUL(scratch[1],Fout[m] , *tw1);
236 C_MUL(scratch[2],Fout[m2] , *tw2);
238 C_ADD(scratch[3],scratch[1],scratch[2]);
239 C_SUB(scratch[0],scratch[1],scratch[2]);
243 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
244 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
246 C_MULBYSCALAR( scratch[0] , epi3.i );
248 C_ADDTO(*Fout,scratch[3]);
250 Fout[m2].r = Fout[m].r + scratch[0].i;
251 Fout[m2].i = Fout[m].i - scratch[0].r;
253 Fout[m].r -= scratch[0].i;
254 Fout[m].i += scratch[0].r;
261 static void ki_bfly3(
263 const size_t fstride,
264 const kiss_fft_state *st,
271 const size_t m2 = 2*m;
272 const kiss_twiddle_cpx *tw1,*tw2;
273 kiss_fft_cpx scratch[5];
274 kiss_twiddle_cpx epi3;
276 kiss_fft_cpx * Fout_beg = Fout;
277 epi3 = st->twiddles[fstride*m];
280 Fout = Fout_beg + i*mm;
281 tw1=tw2=st->twiddles;
285 C_MULC(scratch[1],Fout[m] , *tw1);
286 C_MULC(scratch[2],Fout[m2] , *tw2);
288 C_ADD(scratch[3],scratch[1],scratch[2]);
289 C_SUB(scratch[0],scratch[1],scratch[2]);
293 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
294 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
296 C_MULBYSCALAR( scratch[0] , -epi3.i );
298 C_ADDTO(*Fout,scratch[3]);
300 Fout[m2].r = Fout[m].r + scratch[0].i;
301 Fout[m2].i = Fout[m].i - scratch[0].r;
303 Fout[m].r -= scratch[0].i;
304 Fout[m].i += scratch[0].r;
311 static void kf_bfly5(
313 const size_t fstride,
314 const kiss_fft_state *st,
320 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
322 kiss_fft_cpx scratch[13];
323 const kiss_twiddle_cpx * twiddles = st->twiddles;
324 const kiss_twiddle_cpx *tw;
325 kiss_twiddle_cpx ya,yb;
326 kiss_fft_cpx * Fout_beg = Fout;
328 ya = twiddles[fstride*m];
329 yb = twiddles[fstride*2*m];
334 Fout = Fout_beg + i*mm;
341 for ( u=0; u<m; ++u ) {
342 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
345 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
346 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
347 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
348 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
350 C_ADD( scratch[7],scratch[1],scratch[4]);
351 C_SUB( scratch[10],scratch[1],scratch[4]);
352 C_ADD( scratch[8],scratch[2],scratch[3]);
353 C_SUB( scratch[9],scratch[2],scratch[3]);
355 Fout0->r += scratch[7].r + scratch[8].r;
356 Fout0->i += scratch[7].i + scratch[8].i;
358 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
359 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
361 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
362 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
364 C_SUB(*Fout1,scratch[5],scratch[6]);
365 C_ADD(*Fout4,scratch[5],scratch[6]);
367 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
368 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
369 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
370 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
372 C_ADD(*Fout2,scratch[11],scratch[12]);
373 C_SUB(*Fout3,scratch[11],scratch[12]);
375 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
380 static void ki_bfly5(
382 const size_t fstride,
383 const kiss_fft_state *st,
389 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
391 kiss_fft_cpx scratch[13];
392 const kiss_twiddle_cpx * twiddles = st->twiddles;
393 const kiss_twiddle_cpx *tw;
394 kiss_twiddle_cpx ya,yb;
395 kiss_fft_cpx * Fout_beg = Fout;
397 ya = twiddles[fstride*m];
398 yb = twiddles[fstride*2*m];
403 Fout = Fout_beg + i*mm;
410 for ( u=0; u<m; ++u ) {
413 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
414 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
415 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
416 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
418 C_ADD( scratch[7],scratch[1],scratch[4]);
419 C_SUB( scratch[10],scratch[1],scratch[4]);
420 C_ADD( scratch[8],scratch[2],scratch[3]);
421 C_SUB( scratch[9],scratch[2],scratch[3]);
423 Fout0->r += scratch[7].r + scratch[8].r;
424 Fout0->i += scratch[7].i + scratch[8].i;
426 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
427 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
429 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
430 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
432 C_SUB(*Fout1,scratch[5],scratch[6]);
433 C_ADD(*Fout4,scratch[5],scratch[6]);
435 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
436 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
437 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
438 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
440 C_ADD(*Fout2,scratch[11],scratch[12]);
441 C_SUB(*Fout3,scratch[11],scratch[12]);
443 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
454 void compute_bitrev_table(
457 const size_t fstride,
459 opus_int16 * factors,
460 const kiss_fft_state *st
463 const int p=*factors++; /* the radix */
464 const int m=*factors++; /* stage's fft length/p */
466 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
473 f += fstride*in_stride;
479 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
480 f += fstride*in_stride;
486 /* facbuf is populated by p1,m1,p2,m2, ...
491 int kf_factor(int n,opus_int16 * facbuf)
495 /*factor out powers of 4, powers of 2, then any remaining primes */
499 case 4: p = 2; break;
500 case 2: p = 3; break;
501 default: p += 2; break;
503 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
504 p = n; /* no more factors, skip to end */
507 #ifdef RADIX_TWO_ONLY
521 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
525 for (i=0;i<nfft;++i) {
526 opus_val32 phase = -i;
527 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
530 for (i=0;i<nfft;++i) {
531 const double pi=3.14159265358979323846264338327;
532 double phase = ( -2*pi /nfft ) * i;
533 kf_cexp(twiddles+i, phase );
540 * Allocates all necessary storage space for the fft and ifft.
541 * The return value is a contiguous block of memory. As such,
542 * It can be freed with free().
544 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
546 kiss_fft_state *st=NULL;
547 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
549 if ( lenmem==NULL ) {
550 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
552 if (mem != NULL && *lenmem >= memneeded)
553 st = (kiss_fft_state*)mem;
558 kiss_twiddle_cpx *twiddles;
566 st->twiddles = base->twiddles;
568 while (nfft<<st->shift != base->nfft && st->shift < 32)
573 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
574 compute_twiddles(twiddles, nfft);
577 if (!kf_factor(nfft,st->factors))
583 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
584 if (st->bitrev==NULL)
586 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
594 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
596 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
599 void opus_fft_free(const kiss_fft_state *cfg)
603 opus_free((opus_int16*)cfg->bitrev);
605 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
606 opus_free((kiss_fft_state*)cfg);
610 #endif /* CUSTOM_MODES */
612 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
617 int fstride[MAXFACTORS];
621 /* st->shift can be -1 */
622 shift = st->shift>0 ? st->shift : 0;
624 celt_assert2 (fin != fout, "In-place FFT not supported");
625 /* Bit-reverse the input */
626 for (i=0;i<st->nfft;i++)
628 fout[st->bitrev[i]] = fin[i];
630 fout[st->bitrev[i]].r *= st->scale;
631 fout[st->bitrev[i]].i *= st->scale;
638 p = st->factors[2*L];
639 m = st->factors[2*L+1];
640 fstride[L+1] = fstride[L]*p;
643 m = st->factors[2*L-1];
647 m2 = st->factors[2*i-1];
650 switch (st->factors[2*i])
653 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
656 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
658 #ifndef RADIX_TWO_ONLY
660 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
663 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
671 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
676 int fstride[MAXFACTORS];
680 /* st->shift can be -1 */
681 shift = st->shift>0 ? st->shift : 0;
682 celt_assert2 (fin != fout, "In-place FFT not supported");
683 /* Bit-reverse the input */
684 for (i=0;i<st->nfft;i++)
685 fout[st->bitrev[i]] = fin[i];
690 p = st->factors[2*L];
691 m = st->factors[2*L+1];
692 fstride[L+1] = fstride[L]*p;
695 m = st->factors[2*L-1];
699 m2 = st->factors[2*i-1];
702 switch (st->factors[2*i])
705 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
708 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
710 #ifndef RADIX_TWO_ONLY
712 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
715 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);