1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2008 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 - Neither the name of the Xiph.org Foundation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
24 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 /* This is a simple MDCT implementation that uses a N/4 complex FFT
34 to do most of the work. It should be relatively straightforward to
35 plug in pretty much and FFT here.
37 This replaces the Vorbis FFT (and uses the exact same API), which
38 was a bit too messy and that was ending up duplicating code
39 (might as well use the same FFT everywhere).
41 The algorithm is similar to (and inspired from) Fabrice Bellard's
42 MDCT implementation in FFMPEG, but has differences in signs, ordering
43 and scaling in many places.
52 #include "_kiss_fft_guts.h"
54 #include "os_support.h"
56 #include "stack_alloc.h"
59 #define M_PI 3.141592653
62 void clt_mdct_init(mdct_lookup *l,int N, int maxshift)
66 kiss_twiddle_scalar *trig;
70 l->maxshift = maxshift;
71 for (i=0;i<=maxshift;i++)
74 l->kfft[i] = kiss_fft_alloc(N>>2>>i, 0, 0);
76 l->kfft[i] = kiss_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
77 #ifndef ENABLE_TI_DSPLIB55
82 l->trig = trig = (kiss_twiddle_scalar*)celt_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
85 /* We have enough points that sine isn't necessary */
86 #if defined(FIXED_POINT)
88 l->trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
91 trig[i] = cos(2*M_PI*i/N);
95 void clt_mdct_clear(mdct_lookup *l)
98 for (i=0;i<=l->maxshift;i++)
99 kiss_fft_free(l->kfft[i]);
100 celt_free((kiss_twiddle_scalar*)l->trig);
103 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 *window, int overlap, int shift)
107 kiss_twiddle_scalar sine;
108 VARDECL(kiss_fft_scalar, f);
114 ALLOC(f, N2, kiss_fft_scalar);
115 /* sin(x) ~= x here */
117 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
119 sine = 2*M_PI*(.125f)/N;
122 /* Consider the input to be composed of four blocks: [a, b, c, d] */
123 /* Window, shuffle, fold */
125 /* Temp pointers to make it really clear to the compiler what we're doing */
126 const kiss_fft_scalar * restrict xp1 = in+(overlap>>1);
127 const kiss_fft_scalar * restrict xp2 = in+N2-1+(overlap>>1);
128 kiss_fft_scalar * restrict yp = out;
129 const celt_word16 * restrict wp1 = window+(overlap>>1);
130 const celt_word16 * restrict wp2 = window+(overlap>>1)-1;
131 for(i=0;i<(overlap>>2);i++)
133 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
134 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
135 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
142 wp2 = window+overlap-1;
143 for(;i<N4-(overlap>>2);i++)
145 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
153 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
154 *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
155 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
164 kiss_fft_scalar * restrict yp = out;
165 const kiss_fft_scalar *t = &l->trig[0];
168 kiss_fft_scalar re, im, yr, yi;
171 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
172 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
173 /* works because the cos is nearly one */
174 *yp++ = yr + S_MUL(yi,sine);
175 *yp++ = yi - S_MUL(yr,sine);
179 /* N/4 complex FFT, down-scales by 4/N */
180 kiss_fft(l->kfft[shift], (kiss_fft_cpx *)out, (kiss_fft_cpx *)f);
184 /* Temp pointers to make it really clear to the compiler what we're doing */
185 const kiss_fft_scalar * restrict fp = f;
186 kiss_fft_scalar * restrict yp1 = out;
187 kiss_fft_scalar * restrict yp2 = out+N2-1;
188 const kiss_fft_scalar *t = &l->trig[0];
189 /* Temp pointers to make it really clear to the compiler what we're doing */
192 kiss_fft_scalar yr, yi;
193 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
194 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
195 /* works because the cos is nearly one */
196 *yp1 = yr - S_MUL(yi,sine);
197 *yp2 = yi + S_MUL(yr,sine);;
207 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 * restrict window, int overlap, int shift)
211 kiss_twiddle_scalar sine;
212 VARDECL(kiss_fft_scalar, f);
213 VARDECL(kiss_fft_scalar, f2);
219 ALLOC(f, N2, kiss_fft_scalar);
220 ALLOC(f2, N2, kiss_fft_scalar);
221 /* sin(x) ~= x here */
223 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
225 sine = 2*M_PI*(.125f)/N;
230 /* Temp pointers to make it really clear to the compiler what we're doing */
231 const kiss_fft_scalar * restrict xp1 = in;
232 const kiss_fft_scalar * restrict xp2 = in+N2-1;
233 kiss_fft_scalar * restrict yp = f2;
234 const kiss_fft_scalar *t = &l->trig[0];
237 kiss_fft_scalar yr, yi;
238 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
239 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
240 /* works because the cos is nearly one */
241 *yp++ = yr - S_MUL(yi,sine);
242 *yp++ = yi + S_MUL(yr,sine);
248 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
249 kiss_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f);
253 kiss_fft_scalar * restrict fp = f;
254 const kiss_fft_scalar *t = &l->trig[0];
258 kiss_fft_scalar re, im, yr, yi;
261 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
262 yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
263 yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
264 /* works because the cos is nearly one */
265 *fp++ = yr - S_MUL(yi,sine);
266 *fp++ = yi + S_MUL(yr,sine);
269 /* De-shuffle the components for the middle of the window only */
271 const kiss_fft_scalar * restrict fp1 = f;
272 const kiss_fft_scalar * restrict fp2 = f+N2-1;
273 kiss_fft_scalar * restrict yp = f2;
274 for(i = 0; i < N4; i++)
283 /* Mirror on both sides for TDAC */
285 kiss_fft_scalar * restrict fp1 = f2+N4-1;
286 kiss_fft_scalar * restrict xp1 = out+N2-1;
287 kiss_fft_scalar * restrict yp1 = out+N4-overlap/2;
288 const celt_word16 * restrict wp1 = window;
289 const celt_word16 * restrict wp2 = window+overlap-1;
290 for(i = 0; i< N4-overlap/2; i++)
300 *yp1++ +=-MULT16_32_Q15(*wp1, x1);
301 *xp1-- += MULT16_32_Q15(*wp2, x1);
307 kiss_fft_scalar * restrict fp2 = f2+N4;
308 kiss_fft_scalar * restrict xp2 = out+N2;
309 kiss_fft_scalar * restrict yp2 = out+N-1-(N4-overlap/2);
310 const celt_word16 * restrict wp1 = window;
311 const celt_word16 * restrict wp2 = window+overlap-1;
312 for(i = 0; i< N4-overlap/2; i++)
322 *yp2-- = MULT16_32_Q15(*wp1, x2);
323 *xp2++ = MULT16_32_Q15(*wp2, x2);