1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2008 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /* This is a simple MDCT implementation that uses a N/4 complex FFT
30 to do most of the work. It should be relatively straightforward to
31 plug in pretty much and FFT here.
33 This replaces the Vorbis FFT (and uses the exact same API), which
34 was a bit too messy and that was ending up duplicating code
35 (might as well use the same FFT everywhere).
37 The algorithm is similar to (and inspired from) Fabrice Bellard's
38 MDCT implementation in FFMPEG, but has differences in signs, ordering
39 and scaling in many places.
50 #include "_kiss_fft_guts.h"
52 #include "os_support.h"
54 #include "stack_alloc.h"
58 int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
62 kiss_twiddle_scalar *trig;
63 #if defined(FIXED_POINT)
68 l->maxshift = maxshift;
69 for (i=0;i<=maxshift;i++)
72 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
74 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
75 #ifndef ENABLE_TI_DSPLIB55
80 l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
83 /* We have enough points that sine isn't necessary */
84 #if defined(FIXED_POINT)
86 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
89 trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
94 void clt_mdct_clear(mdct_lookup *l)
97 for (i=0;i<=l->maxshift;i++)
98 opus_fft_free(l->kfft[i]);
99 opus_free((kiss_twiddle_scalar*)l->trig);
102 #endif /* CUSTOM_MODES */
104 /* Forward MDCT trashes the input array */
105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out,
106 const opus_val16 *window, int overlap, int shift, int stride)
110 kiss_twiddle_scalar sine;
111 VARDECL(kiss_fft_scalar, f);
112 VARDECL(kiss_fft_scalar, f2);
118 ALLOC(f, N2, kiss_fft_scalar);
119 ALLOC(f2, N2, kiss_fft_scalar);
120 /* sin(x) ~= x here */
122 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
124 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
127 /* Consider the input to be composed of four blocks: [a, b, c, d] */
128 /* Window, shuffle, fold */
130 /* Temp pointers to make it really clear to the compiler what we're doing */
131 const kiss_fft_scalar * restrict xp1 = in+(overlap>>1);
132 const kiss_fft_scalar * restrict xp2 = in+N2-1+(overlap>>1);
133 kiss_fft_scalar * restrict yp = f;
134 const opus_val16 * restrict wp1 = window+(overlap>>1);
135 const opus_val16 * restrict wp2 = window+(overlap>>1)-1;
136 for(i=0;i<(overlap>>2);i++)
138 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
139 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
140 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
147 wp2 = window+overlap-1;
148 for(;i<N4-(overlap>>2);i++)
150 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
158 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
159 *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
160 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
169 kiss_fft_scalar * restrict yp = f;
170 const kiss_twiddle_scalar *t = &l->trig[0];
173 kiss_fft_scalar re, im, yr, yi;
176 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
177 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
178 /* works because the cos is nearly one */
179 *yp++ = yr + S_MUL(yi,sine);
180 *yp++ = yi - S_MUL(yr,sine);
184 /* N/4 complex FFT, down-scales by 4/N */
185 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
189 /* Temp pointers to make it really clear to the compiler what we're doing */
190 const kiss_fft_scalar * restrict fp = f2;
191 kiss_fft_scalar * restrict yp1 = out;
192 kiss_fft_scalar * restrict yp2 = out+stride*(N2-1);
193 const kiss_twiddle_scalar *t = &l->trig[0];
194 /* Temp pointers to make it really clear to the compiler what we're doing */
197 kiss_fft_scalar yr, yi;
198 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
199 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
200 /* works because the cos is nearly one */
201 *yp1 = yr - S_MUL(yi,sine);
202 *yp2 = yi + S_MUL(yr,sine);;
211 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out,
212 const opus_val16 * restrict window, int overlap, int shift, int stride)
216 kiss_twiddle_scalar sine;
217 VARDECL(kiss_fft_scalar, f);
218 VARDECL(kiss_fft_scalar, f2);
224 ALLOC(f, N2, kiss_fft_scalar);
225 ALLOC(f2, N2, kiss_fft_scalar);
226 /* sin(x) ~= x here */
228 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
230 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
235 /* Temp pointers to make it really clear to the compiler what we're doing */
236 const kiss_fft_scalar * restrict xp1 = in;
237 const kiss_fft_scalar * restrict xp2 = in+stride*(N2-1);
238 kiss_fft_scalar * restrict yp = f2;
239 const kiss_twiddle_scalar *t = &l->trig[0];
242 kiss_fft_scalar yr, yi;
243 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
244 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
245 /* works because the cos is nearly one */
246 *yp++ = yr - S_MUL(yi,sine);
247 *yp++ = yi + S_MUL(yr,sine);
253 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
254 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f);
258 kiss_fft_scalar * restrict fp = f;
259 const kiss_twiddle_scalar *t = &l->trig[0];
263 kiss_fft_scalar re, im, yr, yi;
266 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
267 yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
268 yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
269 /* works because the cos is nearly one */
270 *fp++ = yr - S_MUL(yi,sine);
271 *fp++ = yi + S_MUL(yr,sine);
274 /* De-shuffle the components for the middle of the window only */
276 const kiss_fft_scalar * restrict fp1 = f;
277 const kiss_fft_scalar * restrict fp2 = f+N2-1;
278 kiss_fft_scalar * restrict yp = f2;
279 for(i = 0; i < N4; i++)
287 out -= (N2-overlap)>>1;
288 /* Mirror on both sides for TDAC */
290 kiss_fft_scalar * restrict fp1 = f2+N4-1;
291 kiss_fft_scalar * restrict xp1 = out+N2-1;
292 kiss_fft_scalar * restrict yp1 = out+N4-overlap/2;
293 const opus_val16 * restrict wp1 = window;
294 const opus_val16 * restrict wp2 = window+overlap-1;
295 for(i = 0; i< N4-overlap/2; i++)
305 *yp1++ +=-MULT16_32_Q15(*wp1, x1);
306 *xp1-- += MULT16_32_Q15(*wp2, x1);
312 kiss_fft_scalar * restrict fp2 = f2+N4;
313 kiss_fft_scalar * restrict xp2 = out+N2;
314 kiss_fft_scalar * restrict yp2 = out+N-1-(N4-overlap/2);
315 const opus_val16 * restrict wp1 = window;
316 const opus_val16 * restrict wp2 = window+overlap-1;
317 for(i = 0; i< N4-overlap/2; i++)
327 *yp2-- = MULT16_32_Q15(*wp1, x2);
328 *xp2++ = MULT16_32_Q15(*wp2, x2);