Completed VBR for 0.5.0 release
[speexdsp.git] / libspeex / nb_celp.c
index 22d81e2..76ecb98 100644 (file)
@@ -31,6 +31,7 @@
 #include "vq.h"
 #include "speex_bits.h"
 #include "post_filter.h"
+#include "vbr.h"
 
 #ifndef M_PI
 #define M_PI           3.14159265358979323846  /* pi */
@@ -120,13 +121,23 @@ void *nb_encoder_init(SpeexMode *m)
    st->interp_qlsp = malloc(st->lpcSize*sizeof(float));
    st->rc = malloc(st->lpcSize*sizeof(float));
    st->first = 1;
-   
+
    st->mem_sp = calloc(st->lpcSize, sizeof(float));
    st->mem_sw = calloc(st->lpcSize, sizeof(float));
 
    st->pi_gain = calloc(st->nbSubframes, sizeof(float));
 
    st->pitch = calloc(st->nbSubframes, sizeof(int));
+
+   if (1) {
+      st->vbr = malloc(sizeof(VBRState));
+      vbr_init(st->vbr);
+      st->vbr_quality = 8;
+      st->vbr_enabled = 0;
+   } else {
+      st->vbr = 0;
+   }
+
    return st;
 }
 
@@ -162,7 +173,11 @@ void nb_encoder_destroy(void *state)
    free(st->mem_sw);
    free(st->pi_gain);
    free(st->pitch);
-   
+
+   vbr_destroy(st->vbr);
+   free(st->vbr);
+
+   /*Free state memory... should be last*/
    free(st);
 }
 
@@ -172,12 +187,12 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
    int i, sub, roots;
    float error;
    int ol_pitch;
+   float ol_pitch_coef;
    float ol_gain;
+   float delta_qual=0;
 
    st=state;
-
-   speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
-
+   
    /* Copy new data in input buffer */
    memmove(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
    st->inBuf[st->bufSize-st->frameSize] = in[0] - st->preemph*st->pre_mem;
@@ -219,38 +234,21 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
       st->lsp[i] = acos(st->lsp[i]);
    /*print_vec(st->lsp, 10, "LSP:");*/
    /* LSP Quantization */
-#if 1
-   SUBMODE(lsp_quant)(st->lsp, st->qlsp, st->lpcSize, bits);
-#else
-   for (i=0;i<st->lpcSize;i++)
-     st->qlsp[i]=st->lsp[i];
-#endif
-   /*printf ("LSP ");
-   for (i=0;i<st->lpcSize;i++)
-      printf ("%f ", st->lsp[i]);
-   printf ("\n");
-   printf ("QLSP ");
-   for (i=0;i<st->lpcSize;i++)
-      printf ("%f ", st->qlsp[i]);
-   printf ("\n");*/
-   /* Special case for first frame */
    if (st->first)
    {
       for (i=0;i<st->lpcSize;i++)
          st->old_lsp[i] = st->lsp[i];
-      for (i=0;i<st->lpcSize;i++)
-         st->old_qlsp[i] = st->qlsp[i];
    }
 
 
-   /* Whole frame analysis */
+   /* Whole frame analysis (open-loop estimation of pitch and excitation gain) */
    {
       for (i=0;i<st->lpcSize;i++)
          st->interp_lsp[i] = .5*st->old_lsp[i] + .5*st->lsp[i];
 
       lsp_enforce_margin(st->interp_lsp, st->lpcSize, .002);
 
-      /* Compute interpolated LPCs (unquantized) */
+      /* Compute interpolated LPCs (unquantized) for whole frame*/
       for (i=0;i<st->lpcSize;i++)
          st->interp_lsp[i] = cos(st->interp_lsp[i]);
       lsp_to_lpc(st->interp_lsp, st->interp_lpc, st->lpcSize,st->stack);
@@ -261,32 +259,83 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
       residue(st->frame, st->bw_lpc1, st->exc, st->frameSize, st->lpcSize);
       syn_filt(st->exc, st->bw_lpc2, st->sw, st->frameSize, st->lpcSize);
       
-      if (SUBMODE(lbr_pitch) && SUBMODE(ltp_params))
-      {
-         open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize, &ol_pitch, 1, st->stack);
-         speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
-      } else 
-         ol_pitch = 0;
+      /*Open-loop pitch*/
+      open_loop_nbest_pitch(st->sw, st->min_pitch, st->max_pitch, st->frameSize, 
+                            &ol_pitch, &ol_pitch_coef, 1, st->stack);
 
+      /*Compute "real" excitation*/
       residue(st->frame, st->interp_lpc, st->exc, st->frameSize, st->lpcSize);
-      
+
+      /* Compute open-loop excitation gain */
       ol_gain=0;
       for (i=0;i<st->frameSize;i++)
          ol_gain += st->exc[i]*st->exc[i];
       
       ol_gain=sqrt(1+ol_gain/st->frameSize);
+   }
 
-      /*printf ("ol_gain: %f\n", ol_gain);*/
-      if (1) {
-         int qe = (int)(floor(3.5*log(ol_gain)));
-         if (qe<0)
-            qe=0;
-         if (qe>31)
-            qe=31;
-         ol_gain = exp(qe/3.5);
-         speex_bits_pack(bits, qe, 5);
+   /*Experimental VBR stuff*/
+   if (st->vbr)
+   {
+      delta_qual = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, ol_pitch_coef);
+      if (st->vbr_enabled) 
+      {
+         int qual = (int)floor(st->vbr_quality+delta_qual+.5);
+         if (qual<0)
+            qual=0;
+         if (qual>10)
+            qual=10;
+         speex_encoder_ctl(state, SPEEX_SET_QUALITY, &qual);
       }
+   }
+   /*printf ("VBR quality = %f\n", vbr_qual);*/
+
+   /* First, transmit the sub-mode we use for this frame */
+   speex_bits_pack(bits, st->submodeID, NB_SUBMODE_BITS);
 
+
+   /*Quantize LSPs*/
+#if 1 /*0 for unquantized*/
+   SUBMODE(lsp_quant)(st->lsp, st->qlsp, st->lpcSize, bits);
+#else
+   for (i=0;i<st->lpcSize;i++)
+     st->qlsp[i]=st->lsp[i];
+#endif
+
+   /*If we use low bit-rate pitch mode, transmit open-loop pitch*/
+   if (SUBMODE(lbr_pitch)!=-1 && SUBMODE(ltp_params))
+   {
+      speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
+   } else if (SUBMODE(lbr_pitch)==0)
+   {
+      int quant;
+      speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
+      quant = (int)floor(.5+15*ol_pitch_coef);
+      if (quant>15)
+         quant=0;
+      if (quant<0)
+         quant=0;
+      speex_bits_pack(bits, quant, 4);
+      ol_pitch_coef=0.066667*quant;
+   }
+   
+   
+   /*Quantize and transmit open-loop excitation gain*/
+   {
+      int qe = (int)(floor(3.5*log(ol_gain)));
+      if (qe<0)
+         qe=0;
+      if (qe>31)
+         qe=31;
+      ol_gain = exp(qe/3.5);
+      speex_bits_pack(bits, qe, 5);
+   }
+
+   /* Special case for first frame */
+   if (st->first)
+   {
+      for (i=0;i<st->lpcSize;i++)
+         st->old_qlsp[i] = st->qlsp[i];
    }
 
    /* Loop on sub-frames */
@@ -321,22 +370,10 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
       for (i=0;i<st->lpcSize;i++)
          st->interp_qlsp[i] = (1-tmp)*st->old_qlsp[i] + tmp*st->qlsp[i];
 
+      /* Make sure the filters are stable */
       lsp_enforce_margin(st->interp_lsp, st->lpcSize, .002);
       lsp_enforce_margin(st->interp_qlsp, st->lpcSize, .002);
 
-      if (0) {
-         float *h=PUSH(st->stack, 8);
-         for (i=0;i<8;i++)
-            h[i]=0;
-         h[0]=1;
-         
-         residue_zero(h, st->bw_lpc1, h, 8, st->lpcSize);
-         syn_filt_zero(h, st->interp_qlpc, h, 8, st->lpcSize);
-         syn_filt_zero(h, st->bw_lpc2, h, 8, st->lpcSize);
-         print_vec(h, 8, "lpc_resp");
-         POP(st->stack);
-      }
-      
       /* Compute interpolated LPCs (quantized and unquantized) */
       for (i=0;i<st->lpcSize;i++)
          st->interp_lsp[i] = cos(st->interp_lsp[i]);
@@ -346,6 +383,7 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
          st->interp_qlsp[i] = cos(st->interp_qlsp[i]);
       lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, st->stack);
 
+      /* Compute analysis filter gain at w=pi (for use in SB-CELP) */
       tmp=1;
       st->pi_gain[sub]=0;
       for (i=0;i<=st->lpcSize;i++)
@@ -366,18 +404,7 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
          for (i=2;i<=st->lpcSize;i++)
             st->bw_lpc2[i]=0;
       }
-#ifdef DEBUG
-      printf ("\nlpc0 ");
-      for (i=0;i<=st->lpcSize;i++)
-         printf ("%f ", st->interp_lpc[i]);
-      printf ("\nlpc1 ");
-      for (i=0;i<=st->lpcSize;i++)
-         printf ("%f ", st->bw_lpc1[i]);
-      printf ("\nlpc2 ");
-      for (i=0;i<=st->lpcSize;i++)
-         printf ("%f ", st->bw_lpc2[i]);
-      printf ("\n\n");
-#endif
+
       /* Reset excitation */
       for (i=0;i<st->subframeSize;i++)
          exc[i]=0;
@@ -414,11 +441,13 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
       for (i=0;i<st->subframeSize;i++)
          exc[i]=exc2[i]=0;
 
+      /* If we have a long-term predictor (not all sub-modes have one) */
       if (SUBMODE(ltp_params))
       {
          /* Long-term prediction */
          if (SUBMODE(lbr_pitch) != -1)
          {
+            /* Low bit-rate pitch handling */
             int pit_min, pit_max;
             int margin;
             margin = SUBMODE(lbr_pitch);
@@ -431,12 +460,19 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
             pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
                                        exc, SUBMODE(ltp_params), pit_min, pit_max, 
                                        st->lpcSize, st->subframeSize, bits, st->stack, exc2);
-         } else
+         } else {
+            /* Normal pitch handling */
             pitch = SUBMODE(ltp_quant)(target, sw, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
                                        exc, SUBMODE(ltp_params), st->min_pitch, st->max_pitch, 
                                        st->lpcSize, st->subframeSize, bits, st->stack, exc2);
+         }
          /*printf ("cl_pitch: %d\n", pitch);*/
          st->pitch[sub]=pitch;
+      } else if (SUBMODE(lbr_pitch==0)) {
+         for (i=0;i<st->subframeSize;i++)
+         {
+            exc[i]=exc[i-ol_pitch]*ol_pitch_coef;
+         }
       }
 
       /* Update target for adaptive codebook contribution */
@@ -497,14 +533,7 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
       for (i=0;i<st->subframeSize;i++)
          exc[i]+=st->buf2[i];
 #else
-      if (0)
-      {
-      /* Perform innovation search */
-      SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2,
-                           SUBMODE(innovation_params), st->lpcSize,
-                           st->subframeSize, exc, bits, st->stack);
-      }
-      else
+      /* Quantization of innovation */
       {
          float *innov;
          float ener=0, ener_1;
@@ -540,6 +569,7 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
          
          if (SUBMODE(innovation_quant))
          {
+            /* Normal quantization */
             SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, 
                                       SUBMODE(innovation_params), st->lpcSize, st->subframeSize, 
                                       innov, bits, st->stack);
@@ -547,6 +577,8 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
             for (i=0;i<st->subframeSize;i++)
                exc[i] += innov[i]*ener;
          } else {
+            /* This is the "real" (cheating) excitation in the encoder but the decoder will
+               use white noise */
             for (i=0;i<st->subframeSize;i++)
                exc[i] += st->buf2[i];
          }
@@ -704,27 +736,43 @@ void nb_decode(void *state, SpeexBits *bits, float *out, int lost)
    float pitch_gain[3];
    float ol_gain;
    int ol_pitch=0;
+   float ol_pitch_coef=0;
    int best_pitch=40;
    float best_pitch_gain=-1;
    st=state;
 
+   /* Get the sub-mode that was used */
    st->submodeID = speex_bits_unpack_unsigned(bits, NB_SUBMODE_BITS);
 
+   /* Shift all buffers by one frame */
    memmove(st->inBuf, st->inBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
    memmove(st->excBuf, st->excBuf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
    memmove(st->exc2Buf, st->exc2Buf+st->frameSize, (st->bufSize-st->frameSize)*sizeof(float));
 
-
+   /* Unquantize LSPs */
    SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits);
+
+   /* Handle first frame and lost-packet case */
    if (st->first || st->count_lost)
    {
       for (i=0;i<st->lpcSize;i++)
          st->old_qlsp[i] = st->qlsp[i];
    }
 
-   if (SUBMODE(lbr_pitch) && SUBMODE(ltp_params))
+   /* Get open-loop pitch estimation for low bit-rate pitch coding */
+   if (SUBMODE(lbr_pitch)!=-1 && SUBMODE(ltp_params))
+   {
+      ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
+      speex_bits_pack(bits, ol_pitch-st->min_pitch, 7);
+   } else if (SUBMODE(lbr_pitch)==0)
+   {
+      int quant;
       ol_pitch = st->min_pitch+speex_bits_unpack_unsigned(bits, 7);
+      quant = speex_bits_unpack_unsigned(bits, 4);
+      ol_pitch_coef=0.066667*quant;
+   }
    
+   /* Get global excitation gain */
    {
       int qe;
       qe = speex_bits_unpack_unsigned(bits, 5);
@@ -761,6 +809,7 @@ void nb_decode(void *state, SpeexBits *bits, float *out, int lost)
       lsp_to_lpc(st->interp_qlsp, st->interp_qlpc, st->lpcSize, st->stack);
 
 
+      /* Compute analysis filter at w=pi */
       tmp=1;
       st->pi_gain[sub]=0;
       for (i=0;i<=st->lpcSize;i++)
@@ -794,6 +843,7 @@ void nb_decode(void *state, SpeexBits *bits, float *out, int lost)
          
          if (!lost)
          {
+            /* If the frame was not lost... */
             tmp = fabs(pitch_gain[0])+fabs(pitch_gain[1])+fabs(pitch_gain[2]);
             tmp = fabs(pitch_gain[0]+pitch_gain[1]+pitch_gain[2]);
             if (tmp>best_pitch_gain)
@@ -808,14 +858,21 @@ void nb_decode(void *state, SpeexBits *bits, float *out, int lost)
                   best_pitch_gain=.85;
             }
          } else {
+            /* What to do with pitch if we lost the frame */
             for (i=0;i<st->subframeSize;i++)
                exc[i]=0;
             /*printf ("best_pitch: %d %f\n", st->last_pitch, st->last_pitch_gain);*/
             for (i=0;i<st->subframeSize;i++)
                exc[i]=st->last_pitch_gain*exc[i-st->last_pitch];
          }
+      } else if (SUBMODE(lbr_pitch==0)) {
+         for (i=0;i<st->subframeSize;i++)
+         {
+            exc[i]=exc[i-ol_pitch]*ol_pitch_coef;
+         }
       }
       
+      /* Unquantize the innovation */
       {
          int q_energy;
          float ener;
@@ -840,8 +897,10 @@ void nb_decode(void *state, SpeexBits *bits, float *out, int lost)
             /*Fixed codebook contribution*/
             SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, st->stack);
          } else {
+            float scale;
+            scale = 3*sqrt(1.2-ol_pitch_coef);
             for (i=0;i<st->subframeSize;i++)
-               innov[i] = 3*((((float)rand())/RAND_MAX)-.5);
+               innov[i] = scale*((((float)rand())/RAND_MAX)-.5);
             
          }
 
@@ -856,24 +915,16 @@ void nb_decode(void *state, SpeexBits *bits, float *out, int lost)
 
       for (i=0;i<st->subframeSize;i++)
          exc2[i]=exc[i];
-#if 0
-      /*Compute decoded signal*/
-      syn_filt_mem(exc, st->interp_qlpc, exc2, st->subframeSize, st->lpcSize, st->mem_sp);
 
-      if (st->pf_enabled)
-         st->post_filter_func(exc2, sp, st->interp_qlpc, st->lpcSize, st->subframeSize,
-                              pitch, pitch_gain, st->post_filter_params, st->mem_pf, st->stack);
-#else
+      /* Apply post-filter */
       if (st->pf_enabled && SUBMODE(post_filter_func))
          SUBMODE(post_filter_func)(exc, exc2, st->interp_qlpc, st->lpcSize, st->subframeSize,
                               pitch, pitch_gain, SUBMODE(post_filter_params), st->mem_pf, 
                               st->mem_pf2, st->stack);
       
+      /* Apply synthesis filter */
       syn_filt_mem(exc2, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp);
 
-
-#endif
-
    }
    
    /*Copy output signal*/
@@ -915,21 +966,38 @@ void nb_encoder_ctl(void *state, int request, void *ptr)
    case SPEEX_SET_MODE:
       st->submodeID = (*(int*)ptr);
       break;
+   case SPEEX_GET_MODE:
+      (*(int*)ptr) = st->submodeID;
+      break;
+   case SPEEX_SET_VBR:
+      st->vbr_enabled = (*(int*)ptr);
+      break;
+   case SPEEX_GET_VBR:
+      (*(int*)ptr) = st->vbr_enabled;
+      break;
+   case SPEEX_SET_VBR_QUALITY:
+      st->vbr_quality = (*(int*)ptr);
+      break;
+   case SPEEX_GET_VBR_QUALITY:
+      (*(int*)ptr) = st->vbr_quality;
+      break;
    case SPEEX_SET_QUALITY:
       {
          int quality = (*(int*)ptr);
          if (quality<=0)
             st->submodeID = 1;
-         else if (quality<=2)
+         else if (quality<=1)
             st->submodeID = 1;
-         else if (quality<=4)
+         else if (quality<=2)
             st->submodeID = 2;
-         else if (quality<=6)
+         else if (quality<=4)
             st->submodeID = 3;
-         else if (quality<=8)
+         else if (quality<=6)
             st->submodeID = 4;
-         else if (quality<=10)
+         else if (quality<=8)
             st->submodeID = 5;
+         else if (quality<=10)
+            st->submodeID = 6;
          else
             fprintf(stderr, "Unknown nb_ctl quality: %d\n", quality);
       }