1 /* (C) 2007-2008 Jean-Marc Valin, CSIRO
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
15 - Neither the name of the Xiph.org Foundation nor the names of its
16 contributors may be used to endorse or promote products derived from
17 this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include "os_support.h"
42 /** Takes the pitch vector and the decoded residual vector, computes the gain
43 that will give ||p+g*y||=1 and mixes the residual with the pitch. */
44 static void mix_pitch_and_residual(int * restrict iy, celt_norm_t * restrict X, int N, int K, const celt_norm_t * restrict P)
47 celt_word32_t Ryp, Ryy, Rpp;
49 VARDECL(celt_norm_t, y);
55 yshift = 13-celt_ilog2(K);
57 ALLOC(y, N, celt_norm_t);
60 printf ("%d ", iy[i]);*/
64 Rpp = MAC16_16(Rpp,P[i],P[i]);
65 y[i] = SHL16(iy[i],yshift);
70 /* If this doesn't generate a dual MAC (on supported archs), fire the compiler guy */
73 Ryp = MAC16_16(Ryp, y[i], P[i]);
74 Ryy = MAC16_16(Ryy, y[i], y[i]);
77 /* g = (sqrt(Ryp^2 + Ryy - Rpp*Ryy)-Ryp)/Ryy */
79 celt_sqrt(MULT16_16(ROUND16(Ryp,14),ROUND16(Ryp,14)) + Ryy -
80 MULT16_16(ROUND16(Ryy,14),ROUND16(Rpp,14)))
82 celt_rcp(SHR32(Ryy,9)));
86 X[i] = P[i] + ROUND16(MULT16_16(y[i], g),11);
93 void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *P, ec_enc *enc)
95 VARDECL(celt_norm_t, y);
102 celt_word32_t xy, yy, yp;
104 int N_1; /* Inverse of N, in Q14 format (even for float) */
111 yshift = 13-celt_ilog2(K);
114 ALLOC(y, N, celt_norm_t);
116 ALLOC(signx, N, int);
128 sum = MAC16_16(sum, P[j],P[j]);
130 Rpp = ROUND16(sum, NORM_SHIFT);
132 celt_assert2(Rpp<=NORM_SCALING, "Rpp should never have a norm greater than unity");
137 while (pulsesLeft > 0)
141 celt_word16_t magnitude;
145 /* Decide on how many pulses to find at once */
146 pulsesAtOnce = (pulsesLeft*N_1)>>9; /* pulsesLeft/N */
150 rshift = yshift+1+celt_ilog2(K-pulsesLeft+pulsesAtOnce);
152 magnitude = SHL16(pulsesAtOnce, yshift);
155 /* The squared magnitude term gets added anyway, so we might as well
156 add it outside the loop */
157 yy = ADD32(yy, MULT16_16(magnitude,magnitude));
158 /* Choose between fast and accurate strategy depending on where we are in the search */
161 /* This should ensure that anything we can process will have a better score */
162 celt_word32_t best_num = -VERY_LARGE16;
163 celt_word16_t best_den = 0;
166 celt_word16_t Rxy, Ryy;
167 /* Select sign based on X[j] alone */
168 s = signx[j]*magnitude;
169 /* Temporary sums of the new pulse(s) */
170 Rxy = EXTRACT16(SHR32(xy + MULT16_16(s,X[j]),rshift));
171 /* We're multiplying y[j] by two so we don't have to do it here */
172 Ryy = EXTRACT16(SHR32(yy + MULT16_16(s,y[j]),rshift));
174 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
175 Rxy is positive because the sign is pre-computed) */
176 Rxy = MULT16_16_Q15(Rxy,Rxy);
177 /* The idea is to check for num/den >= best_num/best_den, but that way
178 we can do it without any division */
179 /* OPT: Make sure to use conditional moves here */
180 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
189 celt_word16_t best_num = -VERY_LARGE16;
190 celt_word16_t best_den = 0;
193 celt_word16_t Rxy, Ryy, Ryp;
195 /* Select sign based on X[j] alone */
196 s = signx[j]*magnitude;
197 /* Temporary sums of the new pulse(s) */
198 /* We only shift by 13 so we don't have to multiply by 2 later */
199 Rxy = ROUND16(xy + MULT16_16(s,X[j]), 13);
200 /* We're multiplying y[j] by two so we don't have to do it here */
201 Ryy = ROUND16(yy + MULT16_16(s,y[j]), 14);
202 Ryp = ROUND16(yp + MULT16_16(s,P[j]), 14);
204 /* Compute the gain such that ||p + g*y|| = 1
205 ...but instead, we compute g*Ryy to avoid dividing */
206 g = celt_psqrt(MULT16_16(Ryp,Ryp) + MULT16_16(Ryy,QCONST16(1.f,14)-Rpp)) - Ryp;
207 /* Knowing that gain, what's the error: (x-g*y)^2
208 (result is negated and we discard x^2 because it's constant) */
209 /* score = 2*g*Rxy - g*g*Ryy;*/
211 /* No need to multiply Rxy by 2 because we did it earlier */
212 num = EXTRACT16(SHR32(MULT16_16(SUB16(Rxy,g),g),15));
216 if (MULT16_16(best_den, num) > MULT16_16(Ryy, best_num))
226 is = signx[j]*pulsesAtOnce;
227 s = SHL16(is, yshift);
229 /* Updating the sums of the new pulse(s) */
230 xy = xy + MULT16_16(s,X[j]);
231 /* We're multiplying y[j] by two so we don't have to do it here */
232 yy = yy + MULT16_16(s,y[j]);
233 yp = yp + MULT16_16(s, P[j]);
235 /* Only now that we've made the final choice, update y/iy */
236 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
239 pulsesLeft -= pulsesAtOnce;
242 encode_pulses(iy, N, K, enc);
244 /* Recompute the gain in one pass to reduce the encoder-decoder mismatch
245 due to the recursive computation used in quantisation. */
246 mix_pitch_and_residual(iy, X, N, K, P);
251 /** Decode pulse vector and combine the result with the pitch vector to produce
252 the final normalised signal in the current band. */
253 void alg_unquant(celt_norm_t *X, int N, int K, celt_norm_t *P, ec_dec *dec)
258 decode_pulses(iy, N, K, dec);
259 mix_pitch_and_residual(iy, X, N, K, P);
264 static const celt_word16_t pg[11] = {32767, 24576, 21299, 19661, 19661, 19661, 18022, 18022, 16384, 16384, 16384};
266 static const celt_word16_t pg[11] = {1.f, .75f, .65f, 0.6f, 0.6f, .6f, .55f, .55f, .5f, .5f, .5f};
270 #define LOG_MAX_INTRA 5
272 void intra_prediction(celt_norm_t *x, celt_mask_t *W, int N, int K, celt_norm_t *Y, celt_norm_t * restrict P, int B, int N0, ec_enc *enc)
276 celt_word16_t best_num=-VERY_LARGE16;
277 celt_word16_t best_den=0;
281 celt_word16_t pred_gain;
284 VARDECL(celt_norm_t, Xr);
287 ALLOC(Xr, B*N, celt_norm_t);
289 if (max_pos > MAX_INTRA)
292 /* Reverse the samples of x without reversing the channels */
295 Xr[B*N-B*j-B+c] = x[B*j+c];
297 /* Compute yy for i=0 */
300 yy = MAC16_16(yy, Y[j], Y[j]);
301 } while (++j<B*N); /* Promises we loop at least once */
303 for (i=0;i<max_pos;i++)
306 celt_word16_t num, den;
307 const celt_word16_t * restrict xp = Xr;
308 const celt_word16_t * restrict yp = Y+B*i;
311 xy = MAC16_16(xy, *xp++, *yp++);
312 } while (++j<B*N); /* Promises we loop at least once */
313 /* Using xy^2/yy as the score but without having to do the division */
314 num = MULT16_16_Q15(ROUND16(xy,14),ROUND16(xy,14));
315 den = ROUND16(yy,14);
316 /* If you're really desperate for speed, just use xy as the score */
317 /* OPT: Make sure to use a conditional move here */
318 if (MULT16_16(best_den, num) > MULT16_16(den, best_num))
323 /* Store xy as the sign. We'll normalise it to +/- 1 later. */
326 /* Update yy for the next iteration */
330 yy = yy - MULT16_16(*yp, *yp) + MULT16_16(yp[B*N], yp[B*N]);
342 /*printf ("%d %d ", sign, best);*/
343 ec_enc_bits(enc,sign,1);
344 if (max_pos == MAX_INTRA)
345 ec_enc_bits(enc,best,LOG_MAX_INTRA);
347 ec_enc_uint(enc,best,max_pos);
349 /*printf ("%d %f\n", best, best_score);*/
360 P[B*j+c] = s*Y[B*best+B*(N-j-1)+c];
361 E = MAC16_16(E, P[B*j+c],P[B*j+c]);
364 /*pred_gain = pred_gain/sqrt(E);*/
365 pred_gain = MULT16_16_Q15(pred_gain,celt_rcp(SHL32(celt_sqrt(E),9)));
367 P[j] = PSHR32(MULT16_16(pred_gain, P[j]),8);
376 /*printf ("quant ");*/
377 /*for (j=0;j<N;j++) printf ("%f ", P[j]);*/
381 void intra_unquant(celt_norm_t *x, int N, int K, celt_norm_t *Y, celt_norm_t * restrict P, int B, int N0, ec_dec *dec)
388 celt_word16_t pred_gain;
390 if (max_pos > MAX_INTRA)
393 sign = ec_dec_bits(dec, 1);
399 if (max_pos == MAX_INTRA)
400 best = B*ec_dec_bits(dec, LOG_MAX_INTRA);
402 best = B*ec_dec_uint(dec, max_pos);
403 /*printf ("%d %d ", sign, best);*/
414 P[B*j+c] = s*Y[best+B*(N-j-1)+c];
415 E = MAC16_16(E, P[B*j+c],P[B*j+c]);
418 /*pred_gain = pred_gain/sqrt(E);*/
419 pred_gain = MULT16_16_Q15(pred_gain,celt_rcp(SHL32(celt_sqrt(E),9)));
421 P[j] = PSHR32(MULT16_16(pred_gain, P[j]),8);
429 void intra_fold(celt_norm_t *x, int N, celt_norm_t *Y, celt_norm_t * restrict P, int B, int N0, int Nmax)
442 P[j*B+i] = Y[(Nmax-N0-j-1)*B+i];
443 E += P[j*B+i]*P[j*B+i];
450 E = MAC16_16(E, P[j],P[j]);
453 g = celt_rcp(SHL32(celt_sqrt(E),9));
455 P[j] = PSHR32(MULT16_16(g, P[j]),8);