Tonality and pitch tuning
authorJean-Marc Valin <jmvalin@jmvalin.ca>
Thu, 12 Jan 2012 08:44:49 +0000 (03:44 -0500)
committerJean-Marc Valin <jmvalin@jmvalin.ca>
Fri, 13 Jul 2012 18:50:36 +0000 (14:50 -0400)
Tuned the tonality estimator to trigger on signals where only part of the
spectrum is tonal. Also tuned the pitch detector not to be confused
by short-term correlation.

celt/celt.c
celt/pitch.c
src/analysis.c

index a8a1943..a079702 100644 (file)
@@ -442,7 +442,7 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C,
 #ifdef FUZZING
    is_transient = rand()&0x1;
 #endif
-   /*printf("%d %d %d %f %f\n", is_transient, *tf_estimate, tf_max, 0., 1.);*/
+   /*printf("%d %f %d %f %f ", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/
    return is_transient;
 }
 
@@ -1206,8 +1206,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
          ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
 
          pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+         /* Don't search for the fir last 1.5 octave of the range because
+            there's too many false-positives due to short-term correlation */
          pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
+               COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
          pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
 
          gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
@@ -1619,11 +1621,11 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
      if (st->analysis.valid) {
         int tonal_target;
         float tonal;
-        tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality);
-        tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
+        tonal = MAX16(0,st->analysis.tonality-.2);
+        tonal_target = new_target + (coded_bins<<BITRES)*2.0f*tonal;
         if (pitch_change)
            tonal_target +=  (coded_bins<<BITRES)*.8;
-        /*printf("%f %d\n", tonal, tonal_target);*/
+        /*printf("%f %f ", st->analysis.tonality, tonal);*/
         new_target = IMAX(tonal_target,new_target);
      }
 #endif
index 8e90687..beea61f 100644 (file)
@@ -331,6 +331,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       int T1, T1b;
       opus_val16 g1;
       opus_val16 cont=0;
+      opus_val16 thresh;
       T1 = (2*T0+k)/(2*k);
       if (T1 < minperiod)
          break;
@@ -372,7 +373,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
          cont = HALF32(prev_gain);
       else
          cont = 0;
-      if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont);
+      if (g1 > thresh)
       {
          best_xy = xy;
          best_yy = yy;
index 764b370..1336628 100644 (file)
@@ -74,7 +74,7 @@ static const float tweight[NB_TBANDS+1] = {
       .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
 };
 
-#define NB_TONAL_SKIP_BANDS 0
+#define NB_TONAL_SKIP_BANDS 9
 
 typedef struct {
    float angle[240];
@@ -265,8 +265,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
        frame_stationarity += stationarity;
        /*band_tonality[b] = tE/(1e-15+E)*/;
        band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
-       //printf("%f ", band_tonality[b]);
-#if 1
+#if 0
        if (b>=NB_TONAL_SKIP_BANDS)
        {
           frame_tonality += tweight[b]*band_tonality[b];
@@ -277,7 +276,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
        if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
           frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
 #endif
-       max_frame_tonality = MAX16(max_frame_tonality, frame_tonality);
+       max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality);
        slope += band_tonality[b]*(b-8);
        /*printf("%f %f ", band_tonality[b], stationarity);*/
        if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
@@ -295,7 +294,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
        }
        tonal->prev_band_tonality[b] = band_tonality[b];
     }
-    //printf("\n");
+
     frame_loudness = 20*log10(frame_loudness);
     tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
     tonal->lowECount *= (1-alphaE);
@@ -320,7 +319,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
 #else
     info->activity = .5*(1+frame_noisiness-frame_stationarity);
 #endif
-    frame_tonality = (max_frame_tonality/(tw_sum));
+    frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
     frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
     tonal->prev_tonality = frame_tonality;
     info->boost_amount[0] -= frame_tonality+.2;