1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #include "os_support.h"
42 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
49 for (i=0;i<len-stride;i++)
54 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
55 *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
57 Xptr = &X[len-2*stride-1];
58 for (i=len-2*stride-1;i>=0;i--)
63 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
64 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
68 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
70 static const int SPREAD_FACTOR[3]={15,10,5};
73 opus_val16 gain, theta;
77 if (2*K>=len || spread==SPREAD_NONE)
79 factor = SPREAD_FACTOR[spread-1];
81 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
82 theta = HALF16(MULT16_16_Q15(gain,gain));
84 c = celt_cos_norm(EXTEND32(theta));
85 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */
90 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
91 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
92 while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
95 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
96 extract_collapse_mask().*/
97 len = celt_udiv(len, stride);
98 for (i=0;i<stride;i++)
103 exp_rotation1(X+i*len, len, stride2, s, c);
104 exp_rotation1(X+i*len, len, 1, c, s);
106 exp_rotation1(X+i*len, len, 1, c, -s);
108 exp_rotation1(X+i*len, len, stride2, s, -c);
113 /** Takes the pitch vector and the decoded residual vector, computes the gain
114 that will give ||p+g*y||=1 and mixes the residual with the pitch. */
115 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
116 int N, opus_val32 Ryy, opus_val16 gain)
126 k = celt_ilog2(Ryy)>>1;
128 t = VSHR32(Ryy, 2*(k-7));
129 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
133 X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
137 static unsigned extract_collapse_mask(int *iy, int N, int B)
139 unsigned collapse_mask;
144 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
146 N0 = celt_udiv(N, B);
151 collapse_mask |= (iy[i*N0+j]!=0)<<i;
154 return collapse_mask;
157 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
163 VARDECL(celt_norm, y);
165 VARDECL(opus_val16, signx);
172 unsigned collapse_mask;
175 celt_assert2(K>0, "alg_quant() needs at least one pulse");
176 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
178 ALLOC(y, N, celt_norm);
180 ALLOC(signx, N, opus_val16);
182 exp_rotation(X, N, 1, B, K, spread);
184 /* Get rid of the sign */
201 /* Do a pre-search by projecting on the pyramid */
209 /* If X is too small, just replace it with a pulse at 0 */
213 /* Prevents infinities and NaNs from causing too many pulses
214 to be allocated. 64 is an approximation of infinity here. */
215 if (!(sum > EPSILON && sum < 64))
218 X[0] = QCONST16(1.f,14);
222 sum = QCONST16(1.f,14);
224 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
227 /* It's really important to round *towards zero* here */
228 iy[j] = MULT16_16_Q15(X[j],rcp);
230 iy[j] = (int)floor(rcp*X[j]);
232 y[j] = (celt_norm)iy[j];
233 yy = MAC16_16(yy, y[j],y[j]);
234 xy = MAC16_16(xy, X[j],y[j]);
239 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
241 /* This should never happen, but just in case it does (e.g. on silence)
242 we fill the first bin with pulses. */
243 #ifdef FIXED_POINT_DEBUG
244 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
246 if (pulsesLeft > N+3)
248 opus_val16 tmp = (opus_val16)pulsesLeft;
249 yy = MAC16_16(yy, tmp, tmp);
250 yy = MAC16_16(yy, tmp, y[0]);
256 for (i=0;i<pulsesLeft;i++)
259 opus_val32 best_num = -VERY_LARGE16;
260 opus_val16 best_den = 0;
265 rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
268 /* The squared magnitude term gets added anyway, so we might as well
269 add it outside the loop */
274 /* Temporary sums of the new pulse(s) */
275 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
276 /* We're multiplying y[j] by two so we don't have to do it here */
277 Ryy = ADD16(yy, y[j]);
279 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
280 Rxy is positive because the sign is pre-computed) */
281 Rxy = MULT16_16_Q15(Rxy,Rxy);
282 /* The idea is to check for num/den >= best_num/best_den, but that way
283 we can do it without any division */
284 /* OPT: Make sure to use conditional moves here */
285 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
293 /* Updating the sums of the new pulse(s) */
294 xy = ADD32(xy, EXTEND32(X[best_id]));
295 /* We're multiplying y[j] by two so we don't have to do it here */
296 yy = ADD16(yy, y[best_id]);
298 /* Only now that we've made the final choice, update y/iy */
299 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
304 /* Put the original sign back */
307 X[j] = MULT16_16(signx[j],X[j]);
311 encode_pulses(iy, N, K, enc);
314 normalise_residual(iy, X, N, yy, gain);
315 exp_rotation(X, N, -1, B, K, spread);
318 collapse_mask = extract_collapse_mask(iy, N, B);
320 return collapse_mask;
323 /** Decode pulse vector and combine the result with the pitch vector to produce
324 the final normalised signal in the current band. */
325 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
326 ec_dec *dec, opus_val16 gain)
330 unsigned collapse_mask;
334 celt_assert2(K>0, "alg_unquant() needs at least one pulse");
335 celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
337 decode_pulses(iy, N, K, dec);
341 Ryy = MAC16_16(Ryy, iy[i], iy[i]);
343 normalise_residual(iy, X, N, Ryy, gain);
344 exp_rotation(X, N, -1, B, K, spread);
345 collapse_mask = extract_collapse_mask(iy, N, B);
347 return collapse_mask;
350 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
360 E = EPSILON + celt_inner_prod(X, X, N);
362 k = celt_ilog2(E)>>1;
364 t = VSHR32(E, 2*(k-7));
365 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
370 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
373 /*return celt_sqrt(E);*/
376 int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
380 opus_val16 mid, side;
381 opus_val32 Emid, Eside;
383 Emid = Eside = EPSILON;
389 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
390 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
391 Emid = MAC16_16(Emid, m, m);
392 Eside = MAC16_16(Eside, s, s);
395 Emid += celt_inner_prod(X, X, N);
396 Eside += celt_inner_prod(Y, Y, N);
398 mid = celt_sqrt(Emid);
399 side = celt_sqrt(Eside);
402 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
404 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));