1 /*Copyright (c) 2003-2004, Mark Borgerding
2 Lots of modifications by Jean-Marc Valin
3 Copyright (c) 2005-2007, Xiph.Org Foundation
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
11 * Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.*/
29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
30 heavily modified to better suit Opus */
38 #include "_kiss_fft_guts.h"
40 #include "os_support.h"
42 #include "stack_alloc.h"
44 /* The guts header contains all the multiplication and addition macros that are defined for
45 complex numbers. It also delares the kf_ internal functions.
51 const kiss_fft_state *st,
58 const kiss_twiddle_cpx * tw1;
60 kiss_fft_cpx * Fout_beg = Fout;
63 Fout = Fout_beg + i*mm;
69 Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
70 Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
71 C_MUL (t, *Fout2 , *tw1);
73 C_SUB( *Fout2 , *Fout , t );
84 const kiss_fft_state *st,
91 const kiss_twiddle_cpx * tw1;
94 kiss_fft_cpx * Fout_beg = Fout;
97 Fout = Fout_beg + i*mm;
102 C_MULC (t, *Fout2 , *tw1);
104 C_SUB( *Fout2 , *Fout , t );
105 C_ADDTO( *Fout , t );
112 static void kf_bfly4(
114 const size_t fstride,
115 const kiss_fft_state *st,
121 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
122 kiss_fft_cpx scratch[6];
127 kiss_fft_cpx * Fout_beg = Fout;
130 Fout = Fout_beg + i*mm;
131 tw3 = tw2 = tw1 = st->twiddles;
134 C_MUL4(scratch[0],Fout[m] , *tw1 );
135 C_MUL4(scratch[1],Fout[m2] , *tw2 );
136 C_MUL4(scratch[2],Fout[m3] , *tw3 );
138 Fout->r = PSHR32(Fout->r, 2);
139 Fout->i = PSHR32(Fout->i, 2);
140 C_SUB( scratch[5] , *Fout, scratch[1] );
141 C_ADDTO(*Fout, scratch[1]);
142 C_ADD( scratch[3] , scratch[0] , scratch[2] );
143 C_SUB( scratch[4] , scratch[0] , scratch[2] );
144 C_SUB( Fout[m2], *Fout, scratch[3] );
148 C_ADDTO( *Fout , scratch[3] );
150 Fout[m].r = scratch[5].r + scratch[4].i;
151 Fout[m].i = scratch[5].i - scratch[4].r;
152 Fout[m3].r = scratch[5].r - scratch[4].i;
153 Fout[m3].i = scratch[5].i + scratch[4].r;
159 static void ki_bfly4(
161 const size_t fstride,
162 const kiss_fft_state *st,
168 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
169 kiss_fft_cpx scratch[6];
174 kiss_fft_cpx * Fout_beg = Fout;
177 Fout = Fout_beg + i*mm;
178 tw3 = tw2 = tw1 = st->twiddles;
181 C_MULC(scratch[0],Fout[m] , *tw1 );
182 C_MULC(scratch[1],Fout[m2] , *tw2 );
183 C_MULC(scratch[2],Fout[m3] , *tw3 );
185 C_SUB( scratch[5] , *Fout, scratch[1] );
186 C_ADDTO(*Fout, scratch[1]);
187 C_ADD( scratch[3] , scratch[0] , scratch[2] );
188 C_SUB( scratch[4] , scratch[0] , scratch[2] );
189 C_SUB( Fout[m2], *Fout, scratch[3] );
193 C_ADDTO( *Fout , scratch[3] );
195 Fout[m].r = scratch[5].r - scratch[4].i;
196 Fout[m].i = scratch[5].i + scratch[4].r;
197 Fout[m3].r = scratch[5].r + scratch[4].i;
198 Fout[m3].i = scratch[5].i - scratch[4].r;
204 #ifndef RADIX_TWO_ONLY
206 static void kf_bfly3(
208 const size_t fstride,
209 const kiss_fft_state *st,
217 const size_t m2 = 2*m;
218 const kiss_twiddle_cpx *tw1,*tw2;
219 kiss_fft_cpx scratch[5];
220 kiss_twiddle_cpx epi3;
222 kiss_fft_cpx * Fout_beg = Fout;
223 epi3 = st->twiddles[fstride*m];
226 Fout = Fout_beg + i*mm;
227 tw1=tw2=st->twiddles;
230 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
232 C_MUL(scratch[1],Fout[m] , *tw1);
233 C_MUL(scratch[2],Fout[m2] , *tw2);
235 C_ADD(scratch[3],scratch[1],scratch[2]);
236 C_SUB(scratch[0],scratch[1],scratch[2]);
240 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
241 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
243 C_MULBYSCALAR( scratch[0] , epi3.i );
245 C_ADDTO(*Fout,scratch[3]);
247 Fout[m2].r = Fout[m].r + scratch[0].i;
248 Fout[m2].i = Fout[m].i - scratch[0].r;
250 Fout[m].r -= scratch[0].i;
251 Fout[m].i += scratch[0].r;
258 static void ki_bfly3(
260 const size_t fstride,
261 const kiss_fft_state *st,
268 const size_t m2 = 2*m;
269 const kiss_twiddle_cpx *tw1,*tw2;
270 kiss_fft_cpx scratch[5];
271 kiss_twiddle_cpx epi3;
273 kiss_fft_cpx * Fout_beg = Fout;
274 epi3 = st->twiddles[fstride*m];
277 Fout = Fout_beg + i*mm;
278 tw1=tw2=st->twiddles;
282 C_MULC(scratch[1],Fout[m] , *tw1);
283 C_MULC(scratch[2],Fout[m2] , *tw2);
285 C_ADD(scratch[3],scratch[1],scratch[2]);
286 C_SUB(scratch[0],scratch[1],scratch[2]);
290 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
291 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
293 C_MULBYSCALAR( scratch[0] , -epi3.i );
295 C_ADDTO(*Fout,scratch[3]);
297 Fout[m2].r = Fout[m].r + scratch[0].i;
298 Fout[m2].i = Fout[m].i - scratch[0].r;
300 Fout[m].r -= scratch[0].i;
301 Fout[m].i += scratch[0].r;
308 static void kf_bfly5(
310 const size_t fstride,
311 const kiss_fft_state *st,
317 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
319 kiss_fft_cpx scratch[13];
320 const kiss_twiddle_cpx * twiddles = st->twiddles;
321 const kiss_twiddle_cpx *tw;
322 kiss_twiddle_cpx ya,yb;
323 kiss_fft_cpx * Fout_beg = Fout;
325 ya = twiddles[fstride*m];
326 yb = twiddles[fstride*2*m];
331 Fout = Fout_beg + i*mm;
338 for ( u=0; u<m; ++u ) {
339 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
342 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
343 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
344 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
345 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
347 C_ADD( scratch[7],scratch[1],scratch[4]);
348 C_SUB( scratch[10],scratch[1],scratch[4]);
349 C_ADD( scratch[8],scratch[2],scratch[3]);
350 C_SUB( scratch[9],scratch[2],scratch[3]);
352 Fout0->r += scratch[7].r + scratch[8].r;
353 Fout0->i += scratch[7].i + scratch[8].i;
355 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
356 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
358 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
359 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
361 C_SUB(*Fout1,scratch[5],scratch[6]);
362 C_ADD(*Fout4,scratch[5],scratch[6]);
364 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
365 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
366 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
367 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
369 C_ADD(*Fout2,scratch[11],scratch[12]);
370 C_SUB(*Fout3,scratch[11],scratch[12]);
372 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
377 static void ki_bfly5(
379 const size_t fstride,
380 const kiss_fft_state *st,
386 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
388 kiss_fft_cpx scratch[13];
389 const kiss_twiddle_cpx * twiddles = st->twiddles;
390 const kiss_twiddle_cpx *tw;
391 kiss_twiddle_cpx ya,yb;
392 kiss_fft_cpx * Fout_beg = Fout;
394 ya = twiddles[fstride*m];
395 yb = twiddles[fstride*2*m];
400 Fout = Fout_beg + i*mm;
407 for ( u=0; u<m; ++u ) {
410 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
411 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
412 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
413 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
415 C_ADD( scratch[7],scratch[1],scratch[4]);
416 C_SUB( scratch[10],scratch[1],scratch[4]);
417 C_ADD( scratch[8],scratch[2],scratch[3]);
418 C_SUB( scratch[9],scratch[2],scratch[3]);
420 Fout0->r += scratch[7].r + scratch[8].r;
421 Fout0->i += scratch[7].i + scratch[8].i;
423 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
424 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
426 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
427 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
429 C_SUB(*Fout1,scratch[5],scratch[6]);
430 C_ADD(*Fout4,scratch[5],scratch[6]);
432 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
433 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
434 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
435 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
437 C_ADD(*Fout2,scratch[11],scratch[12]);
438 C_SUB(*Fout3,scratch[11],scratch[12]);
440 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
451 void compute_bitrev_table(
454 const size_t fstride,
456 opus_int16 * factors,
457 const kiss_fft_state *st
460 const int p=*factors++; /* the radix */
461 const int m=*factors++; /* stage's fft length/p */
463 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
470 f += fstride*in_stride;
476 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
477 f += fstride*in_stride;
483 /* facbuf is populated by p1,m1,p2,m2, ...
488 int kf_factor(int n,opus_int16 * facbuf)
492 /*factor out powers of 4, powers of 2, then any remaining primes */
496 case 4: p = 2; break;
497 case 2: p = 3; break;
498 default: p += 2; break;
500 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
501 p = n; /* no more factors, skip to end */
504 #ifdef RADIX_TWO_ONLY
518 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
522 for (i=0;i<nfft;++i) {
523 opus_val32 phase = -i;
524 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
527 for (i=0;i<nfft;++i) {
528 const double pi=3.14159265358979323846264338327;
529 double phase = ( -2*pi /nfft ) * i;
530 kf_cexp(twiddles+i, phase );
537 * Allocates all necessary storage space for the fft and ifft.
538 * The return value is a contiguous block of memory. As such,
539 * It can be freed with free().
541 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
543 kiss_fft_state *st=NULL;
544 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
546 if ( lenmem==NULL ) {
547 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
549 if (mem != NULL && *lenmem >= memneeded)
550 st = (kiss_fft_state*)mem;
555 kiss_twiddle_cpx *twiddles;
559 st->scale = 1.f/nfft;
563 st->twiddles = base->twiddles;
565 while (nfft<<st->shift != base->nfft && st->shift < 32)
570 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
571 compute_twiddles(twiddles, nfft);
574 if (!kf_factor(nfft,st->factors))
580 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
581 if (st->bitrev==NULL)
583 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
591 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
593 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
596 void opus_fft_free(const kiss_fft_state *cfg)
600 opus_free((opus_int16*)cfg->bitrev);
602 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
603 opus_free((kiss_fft_state*)cfg);
607 #endif /* CUSTOM_MODES */
609 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
614 int fstride[MAXFACTORS];
618 /* st->shift can be -1 */
619 shift = st->shift>0 ? st->shift : 0;
621 celt_assert2 (fin != fout, "In-place FFT not supported");
622 /* Bit-reverse the input */
623 for (i=0;i<st->nfft;i++)
625 fout[st->bitrev[i]] = fin[i];
627 fout[st->bitrev[i]].r *= st->scale;
628 fout[st->bitrev[i]].i *= st->scale;
635 p = st->factors[2*L];
636 m = st->factors[2*L+1];
637 fstride[L+1] = fstride[L]*p;
640 m = st->factors[2*L-1];
644 m2 = st->factors[2*i-1];
647 switch (st->factors[2*i])
650 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
653 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
655 #ifndef RADIX_TWO_ONLY
657 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
660 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
668 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
673 int fstride[MAXFACTORS];
677 /* st->shift can be -1 */
678 shift = st->shift>0 ? st->shift : 0;
679 celt_assert2 (fin != fout, "In-place FFT not supported");
680 /* Bit-reverse the input */
681 for (i=0;i<st->nfft;i++)
682 fout[st->bitrev[i]] = fin[i];
687 p = st->factors[2*L];
688 m = st->factors[2*L+1];
689 fstride[L+1] = fstride[L]*p;
692 m = st->factors[2*L-1];
696 m2 = st->factors[2*i-1];
699 switch (st->factors[2*i])
702 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
705 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
707 #ifndef RADIX_TWO_ONLY
709 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
712 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);