wideband VBR seems to (almost) work. Need to adapt it to work on ultra-
[speexdsp.git] / libspeex / nb_celp.c
index 53f1cba..e3ff6a2 100644 (file)
@@ -171,6 +171,7 @@ void *nb_encoder_init(SpeexMode *m)
       st->vbr = 0;
    }
    st->complexity=2;
+   st->sampling_rate=8000;
 
    return st;
 }
@@ -225,7 +226,6 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
    int ol_pitch;
    float ol_pitch_coef;
    float ol_gain;
-   float delta_qual=0;
    float *res, *target, *mem;
    void *stack;
    float *syn_resp;
@@ -251,7 +251,7 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
       st->buf2[i] = st->frame[i] * st->window[i];
 
    /* Compute auto-correlation */
-   autocorr(st->buf2, st->autocorr, st->lpcSize+1, st->windowSize);
+   _spx_autocorr(st->buf2, st->autocorr, st->lpcSize+1, st->windowSize);
 
    st->autocorr[0] += 10;        /* prevents NANs */
    st->autocorr[0] *= st->lpc_floor; /* Noise floor in auto-correlation domain */
@@ -364,21 +364,34 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
    /*Experimental VBR stuff*/
    if (st->vbr)
    {
-      delta_qual = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, ol_pitch_coef);
+      st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, ol_pitch_coef);
       /*if (delta_qual<0)*/
-         delta_qual*=.1*(3+st->vbr_quality);
+      /*  delta_qual*=.1*(3+st->vbr_quality);*/
       if (st->vbr_enabled) 
       {
-         int qual = (int)floor(st->vbr_quality+delta_qual+.5);
-         if (qual<1 && delta_qual>-3.5)
-            qual=1;
-         if (qual<0)
-            qual=0;
-         if (qual>10)
-            qual=10;
-         if (qual==10 && st->vbr_quality<10)
-            qual=9;
-         speex_encoder_ctl(state, SPEEX_SET_QUALITY, &qual);
+         int mode;
+         mode = 7;
+         while (mode)
+         {
+            int v1;
+            float thresh;
+            v1=(int)floor(st->vbr_quality);
+            if (v1==10)
+               thresh = vbr_nb_thresh[mode][v1];
+            else
+               thresh = (st->vbr_quality-v1)*vbr_nb_thresh[mode][v1+1] + (1+v1-st->vbr_quality)*vbr_nb_thresh[mode][v1];
+            if (st->relative_quality > thresh)
+               break;
+            mode--;
+         }
+         /*fprintf(stderr, "");
+         fprintf (stderr, "encode %f %d\n", st->relative_quality, mode);
+         fprintf(stderr, "encode: %d %d\n",st->submodeID, mode);*/
+
+         speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
+         /*fprintf(stderr, "encode: %d %d\n",st->submodeID, mode);*/
+      } else {
+         st->relative_quality = -1;
       }
    }
    /*printf ("VBR quality = %f\n", vbr_qual);*/
@@ -401,7 +414,7 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
       st->first=1;
 
       /* Final signal synthesis from excitation */
-      iir_mem2(st->exc, st->interp_qlpc, st->frame, st->subframeSize, st->lpcSize, st->mem_sp);
+      iir_mem2(st->exc, st->interp_qlpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
 
       in[0] = st->frame[0] + st->preemph*st->pre_mem2;
       for (i=1;i<st->frameSize;i++)
@@ -512,7 +525,6 @@ void nb_encode(void *state, float *in, SpeexBits *bits)
          st->pi_gain[sub] += tmp*st->interp_qlpc[i];
          tmp = -tmp;
       }
-     
 
       /* Compute bandwidth-expanded (unquantized) LPCs for perceptual weighting */
       bw_lpc(st->gamma1, st->interp_lpc, st->bw_lpc1, st->lpcSize);
@@ -783,7 +795,7 @@ void *nb_decoder_init(SpeexMode *m)
    st->pi_gain = (float*)speex_alloc(st->nbSubframes*sizeof(float));
    st->last_pitch = 40;
    st->count_lost=0;
-
+   st->sampling_rate=8000;
 
    st->user_callback.func = &speex_default_user_handler;
    st->user_callback.data = NULL;
@@ -971,9 +983,9 @@ int nb_decode(void *state, SpeexBits *bits, float *out)
       for (i=0;i<st->frameSize;i++)
          st->exc[i]=0;
       st->first=1;
-      
+
       /* Final signal synthesis from excitation */
-      iir_mem2(st->exc, st->interp_qlpc, st->frame, st->subframeSize, st->lpcSize, st->mem_sp);
+      iir_mem2(st->exc, st->interp_qlpc, st->frame, st->frameSize, st->lpcSize, st->mem_sp);
 
       out[0] = st->frame[0] + st->preemph*st->pre_mem;
       for (i=1;i<st->frameSize;i++)
@@ -1251,6 +1263,7 @@ void nb_encoder_ctl(void *state, int request, void *ptr)
    case SPEEX_SET_QUALITY:
       {
          int quality = (*(int*)ptr);
+         /*
          if (quality<=0)
             st->submodeID = 0;
          else if (quality<=1)
@@ -1268,7 +1281,12 @@ void nb_encoder_ctl(void *state, int request, void *ptr)
          else if (quality<=10)
             st->submodeID = 7;
          else
-            fprintf(stderr, "Unknown nb_ctl quality: %d\n", quality);
+         fprintf(stderr, "Unknown nb_ctl quality: %d\n", quality);*/
+         if (quality < 0)
+            quality = 0;
+         if (quality > 10)
+            quality = 10;
+         st->submodeID = ((SpeexNBMode*)(st->mode->mode))->quality_map[quality];
       }
       break;
    case SPEEX_SET_COMPLEXITY:
@@ -1279,11 +1297,31 @@ void nb_encoder_ctl(void *state, int request, void *ptr)
    case SPEEX_GET_COMPLEXITY:
       (*(int*)ptr) = st->complexity;
       break;
+   case SPEEX_SET_BITRATE:
+      {
+         int i=10, rate, target;
+         target = (*(int*)ptr);
+         while (i>=1)
+         {
+            speex_encoder_ctl(st, SPEEX_SET_QUALITY, &i);
+            speex_encoder_ctl(st, SPEEX_GET_BITRATE, &rate);
+            if (rate <= target)
+               break;
+            i--;
+         }
+      }
+      break;
    case SPEEX_GET_BITRATE:
       if (st->submodes[st->submodeID])
-         (*(int*)ptr) = 50*SUBMODE(bits_per_frame);
+         (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
       else
-         (*(int*)ptr) = 50*(NB_SUBMODE_BITS+1);
+         (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
+      break;
+   case SPEEX_SET_SAMPLING_RATE:
+      st->sampling_rate = (*(int*)ptr);
+      break;
+   case SPEEX_GET_SAMPLING_RATE:
+      (*(int*)ptr)=st->sampling_rate;
       break;
    case SPEEX_GET_PI_GAIN:
       {
@@ -1309,6 +1347,9 @@ void nb_encoder_ctl(void *state, int request, void *ptr)
             e[i]=st->innov[i];
       }
       break;
+   case SPEEX_GET_RELATIVE_QUALITY:
+      (*(float*)ptr)=st->relative_quality;
+      break;
    default:
       fprintf(stderr, "Unknown nb_ctl request: %d\n", request);
    }
@@ -1331,9 +1372,15 @@ void nb_decoder_ctl(void *state, int request, void *ptr)
       break;
    case SPEEX_GET_BITRATE:
       if (st->submodes[st->submodeID])
-         (*(int*)ptr) = 50*SUBMODE(bits_per_frame);
+         (*(int*)ptr) = st->sampling_rate*SUBMODE(bits_per_frame)/st->frameSize;
       else
-         (*(int*)ptr) = 50*(NB_SUBMODE_BITS+1);
+         (*(int*)ptr) = st->sampling_rate*(NB_SUBMODE_BITS+1)/st->frameSize;
+      break;
+   case SPEEX_SET_SAMPLING_RATE:
+      st->sampling_rate = (*(int*)ptr);
+      break;
+   case SPEEX_GET_SAMPLING_RATE:
+      (*(int*)ptr)=st->sampling_rate;
       break;
    case SPEEX_SET_HANDLER:
       {