Add a SET_LTP call to allow on the fly activation/deactivation of the long term
authorGregory Maxwell <greg@xiph.org>
Sun, 14 Dec 2008 01:42:03 +0000 (20:42 -0500)
committerGregory Maxwell <greg@xiph.org>
Sun, 14 Dec 2008 02:31:38 +0000 (21:31 -0500)
predictor even in high complexity mode (LTP off is more loss robust), fix the
case where turning on LTP on a running stream can cause prediction against
bogus data by delaying the usage of the LTP until enough data has been encoded.

libcelt/celt.c
libcelt/celt.h

index 34d4f38..9ebd8df 100644 (file)
@@ -77,6 +77,7 @@ struct CELTEncoder {
    int channels;
    
    int pitch_enabled;
+   int pitch_available;
 
    celt_word16_t * restrict preemph_memE; /* Input is 16-bit, so why bother with 32 */
    celt_sig_t    * restrict preemph_memD;
@@ -109,6 +110,7 @@ CELTEncoder *celt_encoder_create(const CELTMode *mode)
    st->overlap = mode->overlap;
 
    st->pitch_enabled = 1;
+   st->pitch_available = 1;
 
    st->in_mem = celt_alloc(st->overlap*C*sizeof(celt_sig_t));
    st->out_mem = celt_alloc((MAX_PERIOD+st->overlap)*C*sizeof(celt_sig_t));
@@ -378,7 +380,6 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
    int has_fold=1;
    ec_byte_buffer buf;
    ec_enc         enc;
-   celt_word32_t curr_power, pitch_power=0;
    VARDECL(celt_sig_t, in);
    VARDECL(celt_sig_t, freq);
    VARDECL(celt_norm_t, X);
@@ -477,6 +478,8 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
    }
 
    /* Pitch analysis: we do it early to save on the peak stack space */
+   /* Don't use pitch if there isn't enough data available yet, or if we're using shortBlocks */
+   has_pitch = st->pitch_enabled && (st->pitch_available >= MAX_PERIOD) && (!shortBlocks);
 #ifdef EXP_PSY
    ALLOC(tonality, MAX_PERIOD/4, celt_word16_t);
    {
@@ -486,7 +489,7 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
       compute_tonality(st->mode, X, st->psy_mem, MAX_PERIOD, tonality, MAX_PERIOD/4);
    }
 #else
-   if (st->pitch_enabled && !shortBlocks)
+   if (has_pitch)
    {
       find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
    }
@@ -541,8 +544,9 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
 #endif
 
    /* Compute MDCTs of the pitch part */
-   if (st->pitch_enabled && !shortBlocks)
+   if (has_pitch)
    {
+      celt_word32_t curr_power, pitch_power=0;
       /* Normalise the pitch vector as well (discard the energies) */
       VARDECL(celt_ener_t, bandEp);
       
@@ -551,18 +555,17 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
       compute_band_energies(st->mode, freq, bandEp);
       normalise_bands(st->mode, freq, P, bandEp);
       pitch_power = bandEp[0]+bandEp[1]+bandEp[2];
-   }
-
-   /* Check if we can safely use the pitch (i.e. effective gain isn't too high) */
-   curr_power = bandE[0]+bandE[1]+bandE[2];
-   has_pitch = 0;
-   if (st->pitch_enabled && !shortBlocks && (MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power))
-   {
-      /* Pitch prediction */
-      compute_pitch_gain(st->mode, X, P, gains);
-      id = quant_pitch(gains, st->mode->nbPBands);
-      if (id > -1)
-         has_pitch = 1;
+      /* Check if we can safely use the pitch (i.e. effective gain isn't too high) */
+      curr_power = bandE[0]+bandE[1]+bandE[2];
+      id=-1;
+      if ((MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power))
+      {
+         /* Pitch prediction */
+         compute_pitch_gain(st->mode, X, P, gains);
+         id = quant_pitch(gains, st->mode->nbPBands);
+      } 
+      if (id == -1)
+         has_pitch = 0;
    }
    
    if (has_pitch) 
@@ -626,8 +629,11 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
    quant_bands(st->mode, X, P, NULL, bandE, stereo_mode, pulses, shortBlocks, has_fold, nbCompressedBytes*8, &enc);
 
    /* Re-synthesis of the coded audio if required */
-   if (st->pitch_enabled || optional_synthesis!=NULL)
+   if (st->pitch_available>0 || optional_synthesis!=NULL)
    {
+      if (st->pitch_available>0 && st->pitch_available<MAX_PERIOD)
+        st->pitch_available+=st->frame_size;
+
       if (C==2)
          renormalise_bands(st->mode, X);
       /* Synthesis */
@@ -743,7 +749,22 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
          int value = va_arg(ap, int);
          if (value<0 || value>10)
             goto bad_arg;
-         if (value<=2)
+         if (value<=2) {
+            st->pitch_enabled = 0; 
+            st->pitch_available = 0;
+         } else {
+              st->pitch_enabled = 1;
+              if (st->pitch_available<1)
+                st->pitch_available = 1;
+         }   
+      }
+      break;
+      case CELT_SET_LTP_REQUEST:
+      {
+         int value = va_arg(ap, int);
+         if (value<0 || value>1 || (value==1 && st->pitch_available==0))
+            goto bad_arg;
+         if (value==0)
             st->pitch_enabled = 0;
          else
             st->pitch_enabled = 1;
index d731d1a..9e1a561 100644 (file)
@@ -71,6 +71,9 @@ extern "C" {
 #define CELT_SET_COMPLEXITY_REQUEST    2
 /** Controls the complexity from 0-10 (int) */
 #define CELT_SET_COMPLEXITY(x) CELT_SET_COMPLEXITY_REQUEST, _celt_check_int(x)
+#define CELT_SET_LTP_REQUEST    3
+/** Activate or deactivate the use of the long term predictor (PITCH) from 0 or 1 (int) */
+#define CELT_SET_LTP(x) CELT_SET_LTP_REQUEST, _celt_check_int(x)
 
 /** GET the frame size used in the current mode */
 #define CELT_GET_FRAME_SIZE   1000