1 /* (C) 2008 Jean-Marc Valin, CSIRO
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 /* This is a simple MDCT implementation that uses a N/4 complex FFT
33 to do most of the work. It should be relatively straightforward to
34 plug in pretty much and FFT here.
36 This replaces the Vorbis FFT (and uses the exact same API), which
37 was a bit too messy and that was ending up duplicating code
38 (might as well use the same FFT everywhere).
40 The algorithm is similar to (and inspired from) Fabrice Bellard's
41 MDCT implementation in FFMPEG, but has differences in signs, ordering
42 and scaling in many places.
50 #include "kfft_double.h"
52 #include "os_support.h"
54 #include "stack_alloc.h"
57 #define M_PI 3.141592653
60 void mdct_init(mdct_lookup *l,int N)
66 l->kfft = cpx32_fft_alloc(N>>2);
67 l->trig = (kiss_twiddle_scalar*)celt_alloc(N2*sizeof(kiss_twiddle_scalar));
68 /* We have enough points that sine isn't necessary */
69 #if defined(FIXED_POINT)
70 #if defined(DOUBLE_PRECISION) & !defined(MIXED_PRECISION)
72 l->trig[i] = SAMP_MAX*cos(2*M_PI*(i+1./8.)/N);
75 l->trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),16386),N));
79 l->trig[i] = cos(2*M_PI*(i+1./8.)/N);
83 void mdct_clear(mdct_lookup *l)
85 cpx32_fft_free(l->kfft);
89 void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16_t *window, int overlap)
93 VARDECL(kiss_fft_scalar, f);
98 ALLOC(f, N2, kiss_fft_scalar);
100 /* Consider the input to be compused of four blocks: [a, b, c, d] */
101 /* Window, shuffle, fold */
103 /* Temp pointers to make it really clear to the compiler what we're doing */
104 const kiss_fft_scalar * restrict xp1 = in+(overlap>>1);
105 const kiss_fft_scalar * restrict xp2 = in+N2-1+(overlap>>1);
106 kiss_fft_scalar * restrict yp = out;
107 const celt_word16_t * restrict wp1 = window+(overlap>>1);
108 const celt_word16_t * restrict wp2 = window+(overlap>>1)-1;
109 for(i=0;i<(overlap>>2);i++)
111 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
112 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
113 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
120 wp2 = window+overlap-1;
121 for(;i<N4-(overlap>>2);i++)
123 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
131 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
132 *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
133 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
142 kiss_fft_scalar * restrict yp = out;
143 kiss_fft_scalar *t = &l->trig[0];
146 kiss_fft_scalar re, im;
149 *yp++ = -S_MUL(re,t[0]) + S_MUL(im,t[N4]);
150 *yp++ = -S_MUL(im,t[0]) - S_MUL(re,t[N4]);
155 /* N/4 complex FFT, down-scales by 4/N */
156 cpx32_fft(l->kfft, out, f, N4);
160 /* Temp pointers to make it really clear to the compiler what we're doing */
161 const kiss_fft_scalar * restrict fp = f;
162 kiss_fft_scalar * restrict yp1 = out;
163 kiss_fft_scalar * restrict yp2 = out+N2-1;
164 kiss_fft_scalar *t = &l->trig[0];
165 /* Temp pointers to make it really clear to the compiler what we're doing */
168 *yp1 = -S_MUL(fp[1],t[N4]) + S_MUL(fp[0],t[0]);
169 *yp2 = -S_MUL(fp[0],t[N4]) - S_MUL(fp[1],t[0]);
180 void mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16_t * restrict window, int overlap)
184 VARDECL(kiss_fft_scalar, f);
185 VARDECL(kiss_fft_scalar, f2);
190 ALLOC(f, N2, kiss_fft_scalar);
191 ALLOC(f2, N2, kiss_fft_scalar);
195 /* Temp pointers to make it really clear to the compiler what we're doing */
196 const kiss_fft_scalar * restrict xp1 = in;
197 const kiss_fft_scalar * restrict xp2 = in+N2-1;
198 kiss_fft_scalar * restrict yp = f2;
199 kiss_fft_scalar *t = &l->trig[0];
202 *yp++ = -S_MUL(*xp2, t[0]) - S_MUL(*xp1,t[N4]);
203 *yp++ = S_MUL(*xp2, t[N4]) - S_MUL(*xp1,t[0]);
210 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
211 cpx32_ifft(l->kfft, f2, f, N4);
215 kiss_fft_scalar * restrict fp = f;
216 kiss_fft_scalar *t = &l->trig[0];
220 kiss_fft_scalar re, im;
223 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
224 *fp++ = S_MUL(re,*t) + S_MUL(im,t[N4]);
225 *fp++ = S_MUL(im,*t) - S_MUL(re,t[N4]);
229 /* De-shuffle the components for the middle of the window only */
231 const kiss_fft_scalar * restrict fp1 = f;
232 const kiss_fft_scalar * restrict fp2 = f+N2-1;
233 kiss_fft_scalar * restrict yp = f2;
234 for(i = 0; i < N4; i++)
243 /* Mirror on both sides for TDAC */
245 kiss_fft_scalar * restrict fp1 = f2+N4-1;
246 kiss_fft_scalar * restrict xp1 = out+N2-1;
247 kiss_fft_scalar * restrict yp1 = out+N4-overlap/2;
248 const celt_word16_t * restrict wp1 = window;
249 const celt_word16_t * restrict wp2 = window+overlap-1;
250 for(i = 0; i< N4-overlap/2; i++)
260 *yp1++ +=-MULT16_32_Q15(*wp1, x1);
261 *xp1-- += MULT16_32_Q15(*wp2, x1);
267 kiss_fft_scalar * restrict fp2 = f2+N4;
268 kiss_fft_scalar * restrict xp2 = out+N2;
269 kiss_fft_scalar * restrict yp2 = out+N-1-(N4-overlap/2);
270 const celt_word16_t * restrict wp1 = window;
271 const celt_word16_t * restrict wp2 = window+overlap-1;
272 for(i = 0; i< N4-overlap/2; i++)
282 *yp2-- = MULT16_32_Q15(*wp1, x2);
283 *xp2++ = MULT16_32_Q15(*wp2, x2);