1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
14 - Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
17 - Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include "os_support.h"
41 #include "stack_alloc.h"
45 static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
46 int max_pitch, int *best_pitch
48 , int yshift, opus_val32 maxcorr
54 opus_val16 best_num[2];
55 opus_val32 best_den[2];
59 xshift = celt_ilog2(maxcorr)-14;
69 Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
70 for (i=0;i<max_pitch;i++)
76 xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
78 /* Considering the range of xcorr16, this should avoid both underflows
79 and overflows (inf) when squaring xcorr16 */
82 num = MULT16_16_Q15(xcorr16,xcorr16);
83 if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
85 if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
87 best_num[1] = best_num[0];
88 best_den[1] = best_den[0];
89 best_pitch[1] = best_pitch[0];
100 Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
105 static void celt_fir5(const opus_val16 *x,
106 const opus_val16 *num,
112 opus_val16 num0, num1, num2, num3, num4;
113 opus_val32 mem0, mem1, mem2, mem3, mem4;
126 opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
127 sum = MAC16_16(sum,num0,mem0);
128 sum = MAC16_16(sum,num1,mem1);
129 sum = MAC16_16(sum,num2,mem2);
130 sum = MAC16_16(sum,num3,mem3);
131 sum = MAC16_16(sum,num4,mem4);
137 y[i] = ROUND16(sum, SIG_SHIFT);
147 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
152 opus_val16 tmp=Q15ONE;
153 opus_val16 lpc[4], mem[5]={0,0,0,0,0};
155 opus_val16 c1 = QCONST16(.8f,15);
158 opus_val32 maxabs = celt_maxabs32(x[0], len);
161 opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
162 maxabs = MAX32(maxabs, maxabs_1);
166 shift = celt_ilog2(maxabs)-10;
172 for (i=1;i<len>>1;i++)
173 x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
174 x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
177 for (i=1;i<len>>1;i++)
178 x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
179 x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
182 _celt_autocorr(x_lp, ac, NULL, 0,
185 /* Noise floor -40 dB */
187 ac[0] += SHR32(ac[0],13);
194 /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
196 ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
198 ac[i] -= ac[i]*(.008f*i)*(.008f*i);
202 _celt_lpc(lpc, ac, 4);
205 tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
206 lpc[i] = MULT16_16_Q15(lpc[i], tmp);
209 lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
210 lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
211 lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
212 lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
213 lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
214 celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
217 #if 0 /* This is a simple version of the pitch correlation that should work
218 well on DSPs like Blackfin and TI C5x/C6x */
225 pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
229 opus_val32 maxcorr=1;
231 for (i=0;i<max_pitch;i++)
235 sum = MAC16_16(sum, x[j],y[i+j]);
238 maxcorr = MAX32(maxcorr, sum);
246 #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
253 pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
257 opus_val32 maxcorr=1;
259 for (i=0;i<max_pitch-3;i+=4)
261 /* Compute correlation*/
262 /*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
267 const opus_val16 *y = _y+i;
268 const opus_val16 *x = _x;
269 opus_val16 y_0, y_1, y_2, y_3;
270 y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
274 for (j=0;j<len-3;j+=4)
279 sum1 = MAC16_16(sum1,tmp,y_0);
280 sum2 = MAC16_16(sum2,tmp,y_1);
281 sum3 = MAC16_16(sum3,tmp,y_2);
282 sum4 = MAC16_16(sum4,tmp,y_3);
285 sum1 = MAC16_16(sum1,tmp,y_1);
286 sum2 = MAC16_16(sum2,tmp,y_2);
287 sum3 = MAC16_16(sum3,tmp,y_3);
288 sum4 = MAC16_16(sum4,tmp,y_0);
291 sum1 = MAC16_16(sum1,tmp,y_2);
292 sum2 = MAC16_16(sum2,tmp,y_3);
293 sum3 = MAC16_16(sum3,tmp,y_0);
294 sum4 = MAC16_16(sum4,tmp,y_1);
297 sum1 = MAC16_16(sum1,tmp,y_3);
298 sum2 = MAC16_16(sum2,tmp,y_0);
299 sum3 = MAC16_16(sum3,tmp,y_1);
300 sum4 = MAC16_16(sum4,tmp,y_2);
304 opus_val16 tmp = *x++;
306 sum1 = MAC16_16(sum1,tmp,y_0);
307 sum2 = MAC16_16(sum2,tmp,y_1);
308 sum3 = MAC16_16(sum3,tmp,y_2);
309 sum4 = MAC16_16(sum4,tmp,y_3);
315 sum1 = MAC16_16(sum1,tmp,y_1);
316 sum2 = MAC16_16(sum2,tmp,y_2);
317 sum3 = MAC16_16(sum3,tmp,y_3);
318 sum4 = MAC16_16(sum4,tmp,y_0);
324 sum1 = MAC16_16(sum1,tmp,y_2);
325 sum2 = MAC16_16(sum2,tmp,y_3);
326 sum3 = MAC16_16(sum3,tmp,y_0);
327 sum4 = MAC16_16(sum4,tmp,y_1);
334 sum1 = MAX32(sum1, sum2);
335 sum3 = MAX32(sum3, sum4);
336 sum1 = MAX32(sum1, sum3);
337 maxcorr = MAX32(maxcorr, sum1);
340 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
341 for (;i<max_pitch;i++)
345 sum = MAC16_16(sum, _x[j],_y[i+j]);
348 maxcorr = MAX32(maxcorr, sum);
357 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
358 int len, int max_pitch, int *pitch)
362 int best_pitch[2]={0,0};
363 VARDECL(opus_val16, x_lp4);
364 VARDECL(opus_val16, y_lp4);
365 VARDECL(opus_val32, xcorr);
368 opus_val32 xmax, ymax;
376 celt_assert(max_pitch>0);
379 ALLOC(x_lp4, len>>2, opus_val16);
380 ALLOC(y_lp4, lag>>2, opus_val16);
381 ALLOC(xcorr, max_pitch>>1, opus_val32);
383 /* Downsample by 2 again */
384 for (j=0;j<len>>2;j++)
385 x_lp4[j] = x_lp[2*j];
386 for (j=0;j<lag>>2;j++)
390 xmax = celt_maxabs16(x_lp4, len>>2);
391 ymax = celt_maxabs16(y_lp4, lag>>2);
392 shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
395 for (j=0;j<len>>2;j++)
396 x_lp4[j] = SHR16(x_lp4[j], shift);
397 for (j=0;j<lag>>2;j++)
398 y_lp4[j] = SHR16(y_lp4[j], shift);
399 /* Use double the shift for a MAC */
406 /* Coarse search with 4x decimation */
411 pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
413 find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
419 /* Finer search with 2x decimation */
423 for (i=0;i<max_pitch>>1;i++)
427 if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
429 for (j=0;j<len>>1;j++)
430 sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
431 xcorr[i] = MAX32(-1, sum);
433 maxcorr = MAX32(maxcorr, sum);
436 find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
442 /* Refine by pseudo-interpolation */
443 if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
446 a = xcorr[best_pitch[0]-1];
447 b = xcorr[best_pitch[0]];
448 c = xcorr[best_pitch[0]+1];
449 if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
451 else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
458 *pitch = 2*best_pitch[0]-offset;
463 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
464 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
465 int N, int *T0_, int prev_period, opus_val16 prev_gain)
472 opus_val32 best_xy, best_yy;
476 VARDECL(opus_val32, yy_lookup);
479 minperiod0 = minperiod;
490 ALLOC(yy_lookup, maxperiod+1, opus_val32);
494 xx = MAC16_16(xx, x[i], x[i]);
495 xy = MAC16_16(xy, x[i], x[i-T0]);
499 for (i=1;i<=maxperiod;i++)
501 yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
502 yy_lookup[i] = MAX32(0, yy);
511 x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
512 sh = celt_ilog2(x2y2)>>1;
513 t = VSHR32(x2y2, 2*(sh-7));
514 g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
517 g = g0 = xy/celt_sqrt(1+xx*yy);
519 /* Look for any pitch at T/k */
529 /* Look for another strong correlation at T1b */
538 T1b = (2*second_check[k]*T0+k)/(2*k);
543 xy = MAC16_16(xy, x[i], x[i-T1]);
544 xy = MAC16_16(xy, x[i], x[i-T1b]);
546 yy = yy_lookup[T1] + yy_lookup[T1b];
551 x2y2 = 1+MULT32_32_Q31(xx,yy);
552 sh = celt_ilog2(x2y2)>>1;
553 t = VSHR32(x2y2, 2*(sh-7));
554 g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
557 g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
559 if (abs(T1-prev_period)<=1)
561 else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
562 cont = HALF32(prev_gain);
565 thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
566 /* Bias against very high pitch (very short period) to avoid false-positives
567 due to short-term correlation */
569 thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
570 else if (T1<2*minperiod)
571 thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
580 best_xy = MAX32(0, best_xy);
581 if (best_yy <= best_xy)
584 pg = SHR32(frac_div32(best_xy,best_yy+1),16);
591 xy = MAC16_16(xy, x[i], x[i-T1]);
594 if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
596 else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))