Temporal pitch search
authorJean-Marc Valin <jean-marc.valin@usherbrooke.ca>
Sun, 8 Nov 2009 13:29:54 +0000 (22:29 +0900)
committerJean-Marc Valin <jean-marc.valin@usherbrooke.ca>
Sun, 8 Nov 2009 13:36:51 +0000 (22:36 +0900)
libcelt/celt.c
libcelt/pitch.c
libcelt/pitch.h

index c7d1c09..33f8a30 100644 (file)
@@ -104,6 +104,8 @@ struct CELTEncoder {
 
    celt_sig *in_mem;
    celt_sig *out_mem;
+   celt_word16 *pitch_buf;
+   celt_sig xmem;
 
    celt_word16 *oldBandE;
 #ifdef EXP_PSY
@@ -185,6 +187,7 @@ CELTEncoder *celt_encoder_create(const CELTMode *mode, int channels, int *error)
 
    st->in_mem = celt_alloc(st->overlap*C*sizeof(celt_sig));
    st->out_mem = celt_alloc((MAX_PERIOD+st->overlap)*C*sizeof(celt_sig));
+   st->pitch_buf = celt_alloc((MAX_PERIOD>>1)*sizeof(celt_word16));
 
    st->oldBandE = (celt_word16*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16));
 
@@ -657,7 +660,8 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig * pcm, celt_sig
             && norm_rate < 50;
    if (has_pitch)
    {
-      find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index, C);
+      /*find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index, C);*/
+      find_temporal_pitch(st->mode, in, st->pitch_buf, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index, C, &st->xmem);
    }
 
    /* Deferred allocation after find_spectral_pitch() to reduce 
@@ -667,7 +671,6 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig * pcm, celt_sig
    ALLOC(pitch_freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
    if (has_pitch)
    {
-      
       compute_mdcts(st->mode, 0, st->out_mem+pitch_index*C, pitch_freq, C);
       has_pitch = compute_pitch_gain(st->mode, freq, pitch_freq, norm_rate, &gain_id, C, &st->gain_prod);
    }
@@ -1247,7 +1250,14 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
    
    if (st->loss_count == 0)
    {
-      find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, st->out_mem+MAX_PERIOD-len, st->out_mem, st->mode->window, NULL, len, MAX_PERIOD-len-100, &pitch_index, C);
+      celt_word16 pitch_buf[MAX_PERIOD];
+      celt_word32 tmp=0;
+      /*find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, st->out_mem+MAX_PERIOD-len, st->out_mem, st->mode->window, NULL, len, MAX_PERIOD-len-100, &pitch_index, C);*/
+      /* FIXME: Should do a bit of interpolation while decimating */
+      for (i=0;i<MAX_PERIOD;i++)
+         pitch_buf[i] = EXTRACT16(SHR32(st->out_mem[2*i], SIG_SHIFT));
+      find_temporal_pitch(st->mode, st->out_mem+MAX_PERIOD-len, pitch_buf, len, MAX_PERIOD-len-100, &pitch_index, C, &tmp);
+
       pitch_index = MAX_PERIOD-len-pitch_index;
       st->last_pitch_index = pitch_index;
    } else {
index e18323f..fc68d38 100644 (file)
@@ -245,5 +245,158 @@ void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyD
    
    /* The peak in the correlation gives us the pitch */
    *pitch = find_max16(Y, max_pitch);
+   /*printf ("%d ", *pitch);*/
    RESTORE_STACK;
 }
+
+void find_best_pitch(celt_word32 *xcorr, celt_word32 maxcorr, celt_word16 *y, int yshift, int len, int max_pitch, int best_pitch[2])
+{
+   int i, j;
+   celt_word32 Syy=1;
+   celt_word16 best_num[2];
+   celt_word32 best_den[2];
+#ifdef FIXED_POINT
+   int xshift;
+
+   xshift = celt_ilog2(maxcorr)-14;
+#endif
+
+   best_num[0] = -1;
+   best_num[1] = -1;
+   best_den[0] = 0;
+   best_den[1] = 0;
+   best_pitch[0] = 0;
+   best_pitch[1] = 1;
+   for (j=0;j<len;j++)
+      Syy = MAC16_16(Syy, y[j],y[j]);
+   for (i=0;i<max_pitch;i++)
+   {
+      float score;
+      if (xcorr[i]>0)
+      {
+         celt_word16 num;
+         celt_word32 xcorr16;
+         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
+         num = MULT16_16_Q15(xcorr16,xcorr16);
+         score = num*1./Syy;
+         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
+         {
+            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
+            {
+               best_num[1] = best_num[0];
+               best_den[1] = best_den[0];
+               best_pitch[1] = best_pitch[0];
+               best_num[0] = num;
+               best_den[0] = Syy;
+               best_pitch[0] = i;
+            } else {
+               best_num[1] = num;
+               best_den[1] = Syy;
+               best_pitch[1] = i;
+            }
+         }
+      }
+      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
+      Syy = MAX32(1, Syy);
+   }
+}
+
+void find_temporal_pitch(const CELTMode *m, const celt_sig * restrict x, celt_word16 * restrict y, int len, int max_pitch, int *pitch, int _C, celt_sig *xmem)
+{
+   int i, j;
+   const int C = CHANNELS(_C);
+   const int lag = MAX_PERIOD;
+   const int N = FRAMESIZE(m);
+   int best_pitch[2]={0};
+   celt_word16 x_lp[len>>1];
+   celt_word16 x_lp4[len>>2];
+   celt_word16 y_lp4[lag>>2];
+   celt_word32 xcorr[max_pitch>>1];
+   celt_word32 maxcorr=1;
+   int offset;
+   int shift=0;
+
+   /* Down-sample by two and downmix to mono */
+   for (i=1;i<len>>1;i++)
+      x_lp[i] = SHR32(HALF32(HALF32(x[(2*i-1)*C]+x[(2*i+1)*C])+x[2*i*C]), SIG_SHIFT);
+   x_lp[0] = SHR32(HALF32(HALF32(*xmem+x[C])+x[0]), SIG_SHIFT);
+   *xmem = x[N-C];
+   if (C==2)
+   {
+      for (i=1;i<len>>1;i++)
+      x_lp[i] = SHR32(HALF32(HALF32(x[(2*i-1)*C+1]+x[(2*i+1)*C+1])+x[2*i*C+1]), SIG_SHIFT);
+      x_lp[0] += SHR32(HALF32(HALF32(x[C+1])+x[1]), SIG_SHIFT);
+      *xmem += x[N-C+1];
+   }
+
+   /* Downsample by 2 again */
+   for (j=0;j<len>>2;j++)
+      x_lp4[j] = x_lp[2*j];
+   for (j=0;j<lag>>2;j++)
+      y_lp4[j] = y[2*j];
+
+#ifdef FIXED_POINT
+   shift = celt_ilog2(MAX16(celt_maxabs16(x_lp4, len>>2), celt_maxabs16(y_lp4, lag>>2)))-11;
+   if (shift>0)
+   {
+      for (j=0;j<len>>2;j++)
+         x_lp4[j] = SHR16(x_lp4[j], shift);
+      for (j=0;j<lag>>2;j++)
+         y_lp4[j] = SHR16(y_lp4[j], shift);
+      /* Use double the shift for a MAC */
+      shift *= 2;
+   } else {
+      shift = 0;
+   }
+#endif
+
+   /* Coarse search with 4x decimation */
+
+   for (i=0;i<max_pitch>>2;i++)
+   {
+      celt_word32 sum = 0;
+      for (j=0;j<len>>2;j++)
+         sum = MAC16_16(sum, x_lp4[j],y_lp4[i+j]);
+      xcorr[i] = MAX32(-1, sum);
+      maxcorr = MAX32(maxcorr, sum);
+   }
+   find_best_pitch(xcorr, maxcorr, y_lp4, 0, len>>2, max_pitch>>2, best_pitch);
+
+   /* Finer search with 2x decimation */
+   maxcorr=1;
+   for (i=0;i<max_pitch>>1;i++)
+   {
+      celt_word32 sum=0;
+      xcorr[i] = 0;
+      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
+         continue;
+      for (j=0;j<len>>1;j++)
+         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+      xcorr[i] = MAX32(-1, sum);
+      maxcorr = MAX32(maxcorr, sum);
+   }
+   find_best_pitch(xcorr, maxcorr, y, shift, len>>1, max_pitch>>1, best_pitch);
+
+   /* Refine by pseudo-interpolation */
+   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
+   {
+      celt_word32 a, b, c;
+      a = xcorr[best_pitch[0]-1];
+      b = xcorr[best_pitch[0]];
+      c = xcorr[best_pitch[0]+1];
+      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
+         offset = 1;
+      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
+         offset = -1;
+      else 
+         offset = 0;
+   } else {
+      offset = 0;
+   }
+   *pitch = 2*best_pitch[0]-offset;
+
+   CELT_COPY(y, y+(N>>1), (lag-N)>>1);
+   CELT_COPY(y+((lag-N)>>1), x_lp, N>>1);
+
+   /*printf ("%d\n", *pitch);*/
+}
index 129f69a..51cc8f3 100644 (file)
@@ -51,4 +51,6 @@ void pitch_state_free(kiss_fftr_cfg st);
     easier to apply psychoacoustic weighting */
 void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyDecay *decay, const celt_sig *x, const celt_sig *y, const celt_word16 *window, celt_word16 * restrict X, int len, int max_pitch, int *pitch, int _C);
 
+void find_temporal_pitch(const CELTMode *m, const celt_sig * restrict x, celt_word16 * restrict y, int len, int max_pitch, int *pitch, int _C, celt_sig *xmem);
+
 #endif