}
}
+static void celt_fir5(const opus_val16 *x,
+ const opus_val16 *num,
+ opus_val16 *y,
+ int N,
+ opus_val16 *mem)
+{
+ int i;
+ opus_val16 num0, num1, num2, num3, num4;
+ opus_val32 mem0, mem1, mem2, mem3, mem4;
+ num0=num[0];
+ num1=num[1];
+ num2=num[2];
+ num3=num[3];
+ num4=num[4];
+ mem0=mem[0];
+ mem1=mem[1];
+ mem2=mem[2];
+ mem3=mem[3];
+ mem4=mem[4];
+ for (i=0;i<N;i++)
+ {
+ opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+ sum = MAC16_16(sum,num0,mem0);
+ sum = MAC16_16(sum,num1,mem1);
+ sum = MAC16_16(sum,num2,mem2);
+ sum = MAC16_16(sum,num3,mem3);
+ sum = MAC16_16(sum,num4,mem4);
+ mem4 = mem3;
+ mem3 = mem2;
+ mem2 = mem1;
+ mem1 = mem0;
+ mem0 = x[i];
+ y[i] = ROUND16(sum, SIG_SHIFT);
+ }
+ mem[0]=mem0;
+ mem[1]=mem1;
+ mem[2]=mem2;
+ mem[3]=mem3;
+ mem[4]=mem4;
+}
+
+
void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
- int len, int C)
+ int len, int C, int arch)
{
int i;
opus_val32 ac[5];
opus_val16 tmp=Q15ONE;
- opus_val16 lpc[4], mem[4]={0,0,0,0};
+ opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+ opus_val16 lpc2[5];
+ opus_val16 c1 = QCONST16(.8f,15);
#ifdef FIXED_POINT
int shift;
opus_val32 maxabs = celt_maxabs32(x[0], len);
}
_celt_autocorr(x_lp, ac, NULL, 0,
- 4, len>>1);
+ 4, len>>1, arch);
/* Noise floor -40 dB */
#ifdef FIXED_POINT
tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
lpc[i] = MULT16_16_Q15(lpc[i], tmp);
}
- celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem);
-
- mem[0]=0;
- lpc[0]=QCONST16(.8f,12);
- celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem);
-
+ /* Add a zero */
+ lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+ lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+ lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+ lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+ lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+ celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
}
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
-static void pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch
#ifdef FIXED_POINT
- ,opus_val32 *maxval
+opus_val32
+#else
+void
#endif
- )
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
{
int i, j;
#ifdef FIXED_POINT
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, x[j],y[i+j]);
- xcorr[i] = MAX32(-1, sum);
+ xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
#endif
}
#ifdef FIXED_POINT
- *maxval = maxcorr;
+ return maxcorr;
#endif
}
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
-static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch
#ifdef FIXED_POINT
- ,opus_val32 *maxval
+opus_val32
+#else
+void
#endif
- )
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
{
int i,j;
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
- /* Truncate slightly if len is not a multiple of 4. */
- len -= len&3;
for (i=0;i<max_pitch-3;i+=4)
{
- /* Compute correlation*/
- /*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
- opus_val32 sum1=0;
- opus_val32 sum2=0;
- opus_val32 sum3=0;
- opus_val32 sum4=0;
- const opus_val16 *y = _y+i;
- const opus_val16 *x = _x;
- opus_val16 y0, y1, y2, y3;
- /*y0=y[0];y1=y[1];y2=y[2];y3=y[3];*/
- y0=*y++;
- y1=*y++;
- y2=*y++;
- for (j=0;j<len;j+=4)
- {
- opus_val16 tmp;
- tmp = *x++;
- y3=*y++;
- sum1 = MAC16_16(sum1,tmp,y0);
- sum2 = MAC16_16(sum2,tmp,y1);
- sum3 = MAC16_16(sum3,tmp,y2);
- sum4 = MAC16_16(sum4,tmp,y3);
- tmp=*x++;
- y0=*y++;
- sum1 = MAC16_16(sum1,tmp,y1);
- sum2 = MAC16_16(sum2,tmp,y2);
- sum3 = MAC16_16(sum3,tmp,y3);
- sum4 = MAC16_16(sum4,tmp,y0);
- tmp=*x++;
- y1=*y++;
- sum1 = MAC16_16(sum1,tmp,y2);
- sum2 = MAC16_16(sum2,tmp,y3);
- sum3 = MAC16_16(sum3,tmp,y0);
- sum4 = MAC16_16(sum4,tmp,y1);
- tmp=*x++;
- y2=*y++;
- sum1 = MAC16_16(sum1,tmp,y3);
- sum2 = MAC16_16(sum2,tmp,y0);
- sum3 = MAC16_16(sum3,tmp,y1);
- sum4 = MAC16_16(sum4,tmp,y2);
- }
- xcorr[i]=MAX32(-1, sum1);
- xcorr[i+1]=MAX32(-1, sum2);
- xcorr[i+2]=MAX32(-1, sum3);
- xcorr[i+3]=MAX32(-1, sum4);
+ opus_val32 sum[4]={0,0,0,0};
+ xcorr_kernel(_x, _y+i, sum, len);
+ xcorr[i]=sum[0];
+ xcorr[i+1]=sum[1];
+ xcorr[i+2]=sum[2];
+ xcorr[i+3]=sum[3];
#ifdef FIXED_POINT
- sum1 = MAX32(sum1, sum2);
- sum3 = MAX32(sum3, sum4);
- sum1 = MAX32(sum1, sum3);
- maxcorr = MAX32(maxcorr, sum1);
+ sum[0] = MAX32(sum[0], sum[1]);
+ sum[2] = MAX32(sum[2], sum[3]);
+ sum[0] = MAX32(sum[0], sum[2]);
+ maxcorr = MAX32(maxcorr, sum[0]);
#endif
}
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, _x[j],_y[i+j]);
- xcorr[i] = MAX32(-1, sum);
+ xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
#endif
}
#ifdef FIXED_POINT
- *maxval = maxcorr;
+ return maxcorr;
#endif
}
#endif
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
- int len, int max_pitch, int *pitch)
+ int len, int max_pitch, int *pitch, int arch)
{
int i, j;
int lag;
/* Coarse search with 4x decimation */
- pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2
#ifdef FIXED_POINT
- ,&maxcorr
+ maxcorr =
#endif
- );
+ celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
#ifdef FIXED_POINT
int k, i, T, T0;
opus_val16 g, g0;
opus_val16 pg;
- opus_val32 xy,xx,yy;
+ opus_val32 xy,xx,yy,xy2;
opus_val32 xcorr[3];
opus_val32 best_xy, best_yy;
int offset;
int minperiod0;
+ VARDECL(opus_val32, yy_lookup);
+ SAVE_STACK;
minperiod0 = minperiod;
maxperiod /= 2;
*T0_=maxperiod-1;
T = T0 = *T0_;
- xx=xy=yy=0;
- for (i=0;i<N;i++)
+ ALLOC(yy_lookup, maxperiod+1, opus_val32);
+ dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+ yy_lookup[0] = xx;
+ yy=xx;
+ for (i=1;i<=maxperiod;i++)
{
- xy = MAC16_16(xy, x[i], x[i-T0]);
- xx = MAC16_16(xx, x[i], x[i]);
- yy = MAC16_16(yy, x[i-T0],x[i-T0]);
+ yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
+ yy_lookup[i] = MAX32(0, yy);
}
+ yy = yy_lookup[T0];
best_xy = xy;
best_yy = yy;
#ifdef FIXED_POINT
{
T1b = (2*second_check[k]*T0+k)/(2*k);
}
- xy=yy=0;
- for (i=0;i<N;i++)
- {
- xy = MAC16_16(xy, x[i], x[i-T1]);
- yy = MAC16_16(yy, x[i-T1], x[i-T1]);
-
- xy = MAC16_16(xy, x[i], x[i-T1b]);
- yy = MAC16_16(yy, x[i-T1b], x[i-T1b]);
- }
+ dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
+ xy += xy2;
+ yy = yy_lookup[T1] + yy_lookup[T1b];
#ifdef FIXED_POINT
{
opus_val32 x2y2;
if (*T0_<minperiod0)
*T0_=minperiod0;
+ RESTORE_STACK;
return pg;
}