Move tf_select before the tf_res bits.
[opus.git] / libcelt / celt.c
index f9ce003..dcce270 100644 (file)
 #include <stdarg.h>
 #include "plc.h"
 
-static const int trim_cdf[7] = {0, 4, 10, 23, 119, 125, 128};
-static const int trim_coef[6] = {4, 6, 7, 8, 10, 12};
+static const unsigned trim_cdf[12] = {0, 2, 4, 9, 19, 41, 87, 109, 119, 124, 126, 128};
+static const unsigned spread_cdf[5] = {0, 7, 9, 30, 32};
+
+#define COMBFILTER_MAXPERIOD 1024
+#define COMBFILTER_MINPERIOD 16
 
 /** Encoder state 
  @brief Encoder state
@@ -68,15 +71,24 @@ struct CELTEncoder {
    int complexity;
    int start, end;
 
-   celt_int32 vbr_rate_norm; /* Target number of 16th bits per frame */
+   celt_int32 vbr_rate_norm; /* Target number of 8th bits per frame */
+   int constrained_vbr;      /* If zero, VBR can do whatever it likes with the rate */
 
    /* Everything beyond this point gets cleared on a reset */
 #define ENCODER_RESET_START frame_max
 
    celt_word32 frame_max;
-   int fold_decision;
+   int spread_decision;
    int delayedIntra;
    int tonal_average;
+   int lastCodedBands;
+
+   int prefilter_period;
+   celt_word16 prefilter_gain;
+#ifdef RESYNTH
+   int prefilter_period_old;
+   celt_word16 prefilter_gain_old;
+#endif
 
    /* VBR-related parameters */
    celt_int32 vbr_reservoir;
@@ -87,7 +99,12 @@ struct CELTEncoder {
    celt_word32 preemph_memE[2];
    celt_word32 preemph_memD[2];
 
+#ifdef RESYNTH
+   celt_sig syn_mem[2][2*MAX_PERIOD];
+#endif
+
    celt_sig in_mem[1]; /* Size = channels*mode->overlap */
+   /* celt_sig prefilter_mem[],  Size = channels*COMBFILTER_PERIOD */
    /* celt_sig overlap_mem[],  Size = channels*mode->overlap */
    /* celt_word16 oldEBands[], Size = channels*mode->nbEBands */
 };
@@ -96,6 +113,7 @@ int celt_encoder_get_size(const CELTMode *mode, int channels)
 {
    int size = sizeof(struct CELTEncoder)
          + (2*channels*mode->overlap-1)*sizeof(celt_sig)
+         + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig)
          + channels*mode->nbEBands*sizeof(celt_word16);
    return size;
 }
@@ -131,12 +149,14 @@ CELTEncoder *celt_encoder_init(CELTEncoder *st, const CELTMode *mode, int channe
 
    st->start = 0;
    st->end = st->mode->effEBands;
+   st->constrained_vbr = 1;
 
    st->vbr_rate_norm = 0;
+   st->vbr_offset = 0;
    st->force_intra  = 0;
    st->delayedIntra = 1;
    st->tonal_average = 256;
-   st->fold_decision = 1;
+   st->spread_decision = SPREAD_NORMAL;
    st->complexity = 5;
 
    if (error)
@@ -172,22 +192,26 @@ static inline celt_word16 SIG2WORD16(celt_sig x)
 static int transient_analysis(const celt_word32 * restrict in, int len, int C,
                               celt_word32 *frame_max, int overlap)
 {
-   int i, n;
-   celt_word32 threshold;
-   VARDECL(celt_word32, begin);
+   int i;
    VARDECL(celt_word16, tmp);
    celt_word32 mem0=0,mem1=0;
+   int is_transient = 0;
+   int block;
+   int N;
+   /* FIXME: Make that smaller */
+   celt_word16 bins[50];
    SAVE_STACK;
    ALLOC(tmp, len, celt_word16);
-   ALLOC(begin, len+1, celt_word32);
 
+   block = overlap/2;
+   N=len/block;
    if (C==1)
    {
       for (i=0;i<len;i++)
          tmp[i] = SHR32(in[i],SIG_SHIFT);
    } else {
       for (i=0;i<len;i++)
-         tmp[i] = SHR32(ADD32(in[C*i],in[C*i+1]), SIG_SHIFT+1);
+         tmp[i] = SHR32(ADD32(in[i],in[i+len]), SIG_SHIFT+1);
    }
 
    /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
@@ -201,43 +225,55 @@ static int transient_analysis(const celt_word32 * restrict in, int len, int C,
       mem1 = x - SHR32(y,1);
 #else
       mem0 = mem1 + y - 2*x;
-      mem1 = x - .5*y;
+      mem1 = x - .5f*y;
 #endif
       tmp[i] = EXTRACT16(SHR(y,2));
    }
    /* First few samples are bad because we don't propagate the memory */
-   for (i=0;i<24;i++)
+   for (i=0;i<12;i++)
       tmp[i] = 0;
 
-   begin[0] = 0;
-   for (i=0;i<len;i++)
-      begin[i+1] = MAX32(begin[i], ABS32(tmp[i]));
-
-   n = -1;
-
-   threshold = MULT16_32_Q15(QCONST16(.4f,15),begin[len]);
-   /* If the following condition isn't met, there's just no way
-      we'll have a transient*/
-   if (*frame_max < threshold)
+   for (i=0;i<N;i++)
    {
-      /* It's likely we have a transient, now find it */
-      for (i=8;i<len-8;i++)
-      {
-         if (begin[i+1] < threshold)
-            n=i;
-      }
+      int j;
+      float max_abs=0;
+      for (j=0;j<block;j++)
+         max_abs = MAX32(max_abs, tmp[i*block+j]);
+      bins[i] = max_abs;
    }
-
-   *frame_max = begin[len-overlap];
-   /* Only consider the last 7.5 ms for the next transient */
-   if (len>360+overlap)
+   for (i=0;i<N;i++)
    {
-      *frame_max = 0;
-      for (i=len-360-overlap;i<len-overlap;i++)
-         *frame_max = MAX32(*frame_max, ABS32(tmp[i]));
+      int j;
+      int conseq=0;
+      celt_word16 t1, t2, t3;
+
+      t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
+      t2 = MULT16_16_Q15(QCONST16(.4f, 15), bins[i]);
+      t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
+      for (j=0;j<i;j++)
+      {
+         if (bins[j] < t1)
+            conseq++;
+         if (bins[j] < t2)
+            conseq++;
+         else
+            conseq = 0;
+      }
+      if (conseq>=3)
+         is_transient=1;
+      conseq = 0;
+      for (j=i+1;j<N;j++)
+      {
+         if (bins[j] < t3)
+            conseq++;
+         else
+            conseq = 0;
+      }
+      if (conseq>=7)
+         is_transient=1;
    }
    RESTORE_STACK;
-   return n>=32;
+   return is_transient;
 }
 
 /** Apply window and compute the MDCT for all sub-frames and 
@@ -254,7 +290,6 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
       int N = mode->shortMdctSize<<LM;
       int B = 1;
       int b, c;
-      VARDECL(celt_word32, x);
       VARDECL(celt_word32, tmp);
       SAVE_STACK;
       if (shortBlocks)
@@ -263,21 +298,17 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
          N = mode->shortMdctSize;
          B = shortBlocks;
       }
-      ALLOC(x, N+overlap, celt_word32);
       ALLOC(tmp, N, celt_word32);
-      for (c=0;c<C;c++)
-      {
+      c=0; do {
          for (b=0;b<B;b++)
          {
             int j;
-            for (j=0;j<N+overlap;j++)
-               x[j] = in[C*(b*N+j)+c];
-            clt_mdct_forward(&mode->mdct, x, tmp, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);
+            clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, tmp, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);
             /* Interleaving the sub-frames */
             for (j=0;j<N;j++)
                out[(j*B+b)+c*N*B] = tmp[j];
          }
-      }
+      } while (++c<C);
       RESTORE_STACK;
    }
 }
@@ -292,8 +323,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
    const int C = CHANNELS(_C);
    const int N = mode->shortMdctSize<<LM;
    const int overlap = OVERLAP(mode);
-   for (c=0;c<C;c++)
-   {
+   c=0; do {
       int j;
          VARDECL(celt_word32, x);
          VARDECL(celt_word32, tmp);
@@ -328,15 +358,14 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
          for (j=0;j<overlap;j++)
             overlap_mem[c][j] = x[N+j];
          RESTORE_STACK;
-   }
+   } while (++c<C);
 }
 
 static void deemphasis(celt_sig *in[], celt_word16 *pcm, int N, int _C, const celt_word16 *coef, celt_sig *mem)
 {
    const int C = CHANNELS(_C);
    int c;
-   for (c=0;c<C;c++)
-   {
+   c=0; do {
       int j;
       celt_sig * restrict x;
       celt_word16  * restrict y;
@@ -354,14 +383,53 @@ static void deemphasis(celt_sig *in[], celt_word16 *pcm, int N, int _C, const ce
          y+=C;
       }
       mem[c] = m;
+   } while (++c<C);
+}
+
+#ifdef ENABLE_POSTFILTER
+static void comb_filter(celt_word32 *y, celt_word32 *x, int T0, int T1, int N,
+      int C, celt_word16 g0, celt_word16 g1, const celt_word16 *window, int overlap)
+{
+   int i;
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
+   celt_word16 g00, g01, g02, g10, g11, g12;
+   celt_word16 t0, t1, t2;
+   /* zeros at theta = +/- 5*pi/6 */
+   t0 = QCONST16(.26795f, 15);
+   t1 = QCONST16(.46410f, 15);
+   t2 = QCONST16(.26795f, 15);
+   g00 = MULT16_16_Q15(g0, t0);
+   g01 = MULT16_16_Q15(g0, t1);
+   g02 = MULT16_16_Q15(g0, t2);
+   g10 = MULT16_16_Q15(g1, t0);
+   g11 = MULT16_16_Q15(g1, t1);
+   g12 = MULT16_16_Q15(g1, t2);
+   for (i=0;i<overlap;i++)
+   {
+      celt_word16 f;
+      f = MULT16_16_Q15(window[i],window[i]);
+      y[i] = x[i]
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0-1])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0+1])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x[i-T1-1])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1+1]);
+
    }
+   for (i=overlap;i<N;i++)
+      y[i] = x[i]
+               + MULT16_32_Q15(g11,x[i-T1])
+               + MULT16_32_Q15(g10,x[i-T1-1])
+               + MULT16_32_Q15(g12,x[i-T1+1]);
 }
+#endif /* ENABLE_POSTFILTER */
 
 static const signed char tf_select_table[4][8] = {
       {0, -1, 0, -1,    0,-1, 0,-1},
-      {0, -1, 0, -2,    1, 0, 1 -1},
-      {0, -2, 0, -3,    2, 0, 1 -1},
-      {0, -2, 0, -3,    2, 0, 1 -1},
+      {0, -1, 0, -2,    1, 0, 1,-1},
+      {0, -2, 0, -3,    2, 0, 1,-1},
+      {0, -2, 0, -3,    2, 0, 1,-1},
 };
 
 static celt_word32 l1_metric(const celt_norm *tmp, int N, int LM, int width)
@@ -404,9 +472,9 @@ static int tf_analysis(const CELTMode *m, celt_word16 *bandLogE, celt_word16 *ol
    int tf_select=0;
    SAVE_STACK;
 
-   /* FIXME: Should check number of bytes *left* */
    if (nbCompressedBytes<15*C)
    {
+      *tf_sum = 0;
       for (i=0;i<len;i++)
          tf_res[i] = isTransient;
       return 0;
@@ -434,10 +502,10 @@ static int tf_analysis(const CELTMode *m, celt_word16 *bandLogE, celt_word16 *ol
       N = (m->eBands[i+1]-m->eBands[i])<<LM;
       for (j=0;j<N;j++)
          tmp[j] = X[j+(m->eBands[i]<<LM)];
-      /* FIXME: Do something with the right channel */
-      /*if (C==2)
+      /* Just add the right channel if we're in stereo */
+      if (C==2)
          for (j=0;j<N;j++)
-            tmp[j] = ADD16(tmp[j],X[N0+j+(m->eBands[i]<<LM)]);*/
+            tmp[j] = ADD16(tmp[j],X[N0+j+(m->eBands[i]<<LM)]);
       L1 = l1_metric(tmp, N, isTransient ? LM : 0, N>>LM);
       best_L1 = L1;
       /*printf ("%f ", L1);*/
@@ -519,42 +587,46 @@ static int tf_analysis(const CELTMode *m, celt_word16 *bandLogE, celt_word16 *ol
    return tf_select;
 }
 
-static void tf_encode(int start, int end, int isTransient, int *tf_res, int nbCompressedBytes, int LM, int tf_select, ec_enc *enc)
+static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
 {
    int curr, i;
-   ec_enc_bit_prob(enc, tf_res[start], isTransient ? 16384 : 4096);
+   if (LM!=0)
+      ec_enc_bit_logp(enc, tf_select, 1);
+   ec_enc_bit_logp(enc, tf_res[start], isTransient ? 2 : 4);
    curr = tf_res[start];
    for (i=start+1;i<end;i++)
    {
-      ec_enc_bit_prob(enc, tf_res[i] ^ curr, isTransient ? 4096 : 2048);
+      ec_enc_bit_logp(enc, tf_res[i] ^ curr, isTransient ? 4 : 5);
       curr = tf_res[i];
    }
-   ec_enc_bits(enc, tf_select, 1);
    for (i=start;i<end;i++)
       tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
    /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/
 }
 
-static void tf_decode(int start, int end, int C, int isTransient, int *tf_res, int nbCompressedBytes, int LM, ec_dec *dec)
+static void tf_decode(int start, int end, int C, int isTransient, int *tf_res, int LM, ec_dec *dec)
 {
    int i, curr, tf_select;
-   tf_res[start] = ec_dec_bit_prob(dec, isTransient ? 16384 : 4096);
-   curr = tf_res[start];
+   if (LM!=0)
+      tf_select = ec_dec_bit_logp(dec, 1);
+   else
+      tf_select = 0;
+   curr = ec_dec_bit_logp(dec, isTransient ? 2 : 4);
+   tf_res[start] = tf_select_table[LM][4*isTransient+2*tf_select+curr];
    for (i=start+1;i<end;i++)
    {
-      tf_res[i] = ec_dec_bit_prob(dec, isTransient ? 4096 : 2048) ^ curr;
-      curr = tf_res[i];
+      curr = ec_dec_bit_logp(dec, isTransient ? 4 : 5) ^ curr;
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+curr];
    }
-   tf_select = ec_dec_bits(dec, 1);
-   for (i=start;i<end;i++)
-      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
 }
 
 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       const celt_word16 *bandLogE, int nbEBands, int LM, int C, int N0)
 {
    int i;
-   int trim_index = 3;
+   celt_word32 diff=0;
+   int c;
+   int trim_index = 5;
    if (C==2)
    {
       celt_word16 sum = 0; /* Q10 */
@@ -569,49 +641,81 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       }
       sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
       /*printf ("%f\n", sum);*/
-      trim_index++;
-      if (sum > QCONST16(.9,10))
+      if (sum > QCONST16(.995f,10))
+         trim_index-=4;
+      else if (sum > QCONST16(.92f,10))
          trim_index-=3;
-      else if (sum > QCONST16(.65,10))
+      else if (sum > QCONST16(.85f,10))
          trim_index-=2;
-      else if (sum > QCONST16(.45,10))
-         trim_index--;
+      else if (sum > QCONST16(.8f,10))
+         trim_index-=1;
    }
-#if 0
-   float diff=0;
-   for (c=0;c<C;c++)
-   {
+
+   /* Estimate spectral tilt */
+   c=0; do {
       for (i=0;i<nbEBands-1;i++)
       {
-         diff += bandLogE[i+c*nbEBands]*(i-.5*nbEBands);
+         diff += bandLogE[i+c*nbEBands]*(celt_int32)(2+2*i-nbEBands);
       }
-   }
+   } while (++c<0);
    diff /= C*(nbEBands-1);
    /*printf("%f\n", diff);*/
-   if (diff > 4)
+   if (diff > QCONST16(2.f, DB_SHIFT))
       trim_index--;
-   if (diff > 8)
+   if (diff > QCONST16(8.f, DB_SHIFT))
       trim_index--;
-   /*if (diff < -10)
-      trim_index++;*/
-#endif
+   if (diff < -QCONST16(4.f, DB_SHIFT))
+      trim_index++;
+   if (diff < -QCONST16(10.f, DB_SHIFT))
+      trim_index++;
+
    if (trim_index<0)
       trim_index = 0;
-   if (trim_index>5)
-      trim_index = 5;
+   if (trim_index>10)
+      trim_index = 10;
    return trim_index;
 }
 
+static int stereo_analysis(const CELTMode *m, const celt_norm *X,
+      int nbEBands, int LM, int C, int N0)
+{
+   int i;
+   int thetas;
+   celt_word32 sumLR = EPSILON, sumMS = EPSILON;
+
+   /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
+   for (i=0;i<13;i++)
+   {
+      int j;
+      for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+      {
+         celt_word16 L, R, M, S;
+         L = X[j];
+         R = X[N0+j];
+         M = L+R;
+         S = L-R;
+         sumLR += EXTEND32(ABS16(L)) + EXTEND32(ABS16(R));
+         sumMS += EXTEND32(ABS16(M)) + EXTEND32(ABS16(S));
+      }
+   }
+   sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
+   thetas = 13;
+   /* We don't need thetas for lower bands with LM<=1 */
+   if (LM<=1)
+      thetas -= 8;
+   return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
+         > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
+}
+
 #ifdef FIXED_POINT
-int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, celt_int16 * optional_resynthesis, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
 {
 #else
-int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, celt_sig * optional_resynthesis, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
 {
 #endif
    int i, c, N;
    int bits;
-   int has_fold=1;
    ec_byte_buffer buf;
    ec_enc         _enc;
    VARDECL(celt_sig, in);
@@ -626,6 +730,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
    VARDECL(int, fine_priority);
    VARDECL(int, tf_res);
    celt_sig *_overlap_mem;
+   celt_sig *prefilter_mem;
    celt_word16 *oldBandE;
    int shortBlocks=0;
    int isTransient=0;
@@ -638,6 +743,13 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
    int codedBands;
    int tf_sum;
    int alloc_trim;
+   int pitch_index=COMBFILTER_MINPERIOD;
+   celt_word16 gain1 = 0;
+   int intensity=0;
+   int dual_stereo=0;
+   int effectiveBytes;
+   celt_word16 pf_threshold;
+   int dynalloc_prob;
    SAVE_STACK;
 
    if (nbCompressedBytes<0 || pcm==NULL)
@@ -650,8 +762,10 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
       return CELT_BAD_ARG;
    M=1<<LM;
 
-   _overlap_mem = st->in_mem+C*(st->overlap);
-   oldBandE = (celt_word16*)(st->in_mem+2*C*(st->overlap));
+   prefilter_mem = st->in_mem+C*(st->overlap);
+   _overlap_mem = prefilter_mem+C*COMBFILTER_MAXPERIOD;
+   /*_overlap_mem = st->in_mem+C*(st->overlap);*/
+   oldBandE = (celt_word16*)(st->in_mem+C*(2*st->overlap+COMBFILTER_MAXPERIOD));
 
    if (enc==NULL)
    {
@@ -664,6 +778,11 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
    }
    nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
 
+   if (st->vbr_rate_norm>0)
+      effectiveBytes = st->vbr_rate_norm>>BITRES<<LM>>3;
+   else
+      effectiveBytes = nbCompressedBytes;
+
    effEnd = st->end;
    if (effEnd > st->mode->effEBands)
       effEnd = st->mode->effEBands;
@@ -671,25 +790,134 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
    N = M*st->mode->shortMdctSize;
    ALLOC(in, C*(N+st->overlap), celt_sig);
 
-   CELT_COPY(in, st->in_mem, C*st->overlap);
-   for (c=0;c<C;c++)
+   /* Find pitch period and gain */
    {
-      const celt_word16 * restrict pcmp = pcm+c;
-      celt_sig * restrict inp = in+C*st->overlap+c;
-      for (i=0;i<N;i++)
+      VARDECL(celt_sig, _pre);
+      celt_sig *pre[2];
+      SAVE_STACK;
+      c = 0;
+      ALLOC(_pre, C*(N+COMBFILTER_MAXPERIOD), celt_sig);
+
+      pre[0] = _pre;
+      pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
+
+      c=0; do {
+         const celt_word16 * restrict pcmp = pcm+c;
+         celt_sig * restrict inp = in+c*(N+st->overlap)+st->overlap;
+
+         for (i=0;i<N;i++)
+         {
+            /* Apply pre-emphasis */
+            celt_sig tmp = MULT16_16(st->mode->preemph[2], SCALEIN(*pcmp));
+            *inp = tmp + st->preemph_memE[c];
+            st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp)
+                                   - MULT16_32_Q15(st->mode->preemph[0], tmp);
+            inp++;
+            pcmp+=C;
+         }
+         CELT_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
+         CELT_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
+      } while (++c<C);
+
+#ifdef ENABLE_POSTFILTER
+      if (nbAvailableBytes>12*C && st->start==0)
+      {
+         VARDECL(celt_word16, pitch_buf);
+         ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, celt_word16);
+         celt_word32 tmp=0;
+         celt_word32 mem0[2]={0,0};
+         celt_word16 mem1[2]={0,0};
+
+         pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD+N,
+                          C, mem0, mem1);
+         pitch_search(st->mode, pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
+               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index, &tmp, 1<<LM);
+         pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
+
+         gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
+               N, &pitch_index, st->prefilter_period, st->prefilter_gain);
+         if (pitch_index > COMBFILTER_MAXPERIOD)
+            pitch_index = COMBFILTER_MAXPERIOD;
+         gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+      } else {
+         gain1 = 0;
+      }
+
+      /* Gain threshold for enabling the prefilter/postfilter */
+      pf_threshold = QCONST16(.2f,15);
+
+      /* Adjusting the threshold based on rate and continuity */
+      if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
+         pf_threshold += QCONST16(.2f,15);
+      if (nbAvailableBytes<25)
+         pf_threshold += QCONST16(.1f,15);
+      if (nbAvailableBytes<35)
+         pf_threshold += QCONST16(.1f,15);
+      if (st->prefilter_gain > QCONST16(.4f,15))
+         pf_threshold -= QCONST16(.1f,15);
+      if (st->prefilter_gain > QCONST16(.55f,15))
+         pf_threshold -= QCONST16(.1f,15);
+
+      /* Hard threshold at 0.2 */
+      pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
+      if (gain1<pf_threshold)
       {
-         /* Apply pre-emphasis */
-         celt_sig tmp = MULT16_16(st->mode->preemph[2], SCALEIN(*pcmp));
-         *inp = tmp + st->preemph_memE[c];
-         st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp)
-                             - MULT16_32_Q15(st->mode->preemph[0], tmp);
-         inp += C;
-         pcmp += C;
+         ec_enc_bit_logp(enc, 0, 1);
+         gain1 = 0;
+      } else {
+         int qg;
+         int octave;
+
+         if (gain1 > QCONST16(.6f,15))
+            gain1 = QCONST16(.6f,15);
+         if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1,15))
+            gain1=st->prefilter_gain;
+
+#ifdef FIXED_POINT
+         qg = ((gain1+2048)>>12)-2;
+#else
+         qg = floor(.5+gain1*8)-2;
+#endif
+         ec_enc_bit_logp(enc, 1, 1);
+         octave = EC_ILOG(pitch_index)-5;
+         ec_enc_uint(enc, octave, 6);
+         ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
+         ec_enc_bits(enc, qg, 2);
+         gain1 = QCONST16(.125f,15)*(qg+2);
       }
+      /*printf("%d %f\n", pitch_index, gain1);*/
+#else /* ENABLE_POSTFILTER */
+      ec_enc_bit_logp(enc, 0, 1);
+#endif /* ENABLE_POSTFILTER */
+
+      c=0; do {
+         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+         CELT_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
+#ifdef ENABLE_POSTFILTER
+         comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
+               st->prefilter_period, pitch_index, N, C, -st->prefilter_gain, -gain1, st->mode->window, st->mode->overlap);
+#endif /* ENABLE_POSTFILTER */
+         CELT_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
+
+#ifdef ENABLE_POSTFILTER
+         if (N>COMBFILTER_MAXPERIOD)
+         {
+            CELT_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+         } else {
+            CELT_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
+            CELT_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+         }
+#endif /* ENABLE_POSTFILTER */
+      } while (++c<C);
+
+      RESTORE_STACK;
    }
-   CELT_COPY(st->in_mem, in+C*N, C*st->overlap);
 
-   resynth = optional_resynthesis!=NULL;
+#ifdef RESYNTH
+   resynth = 1;
+#else
+   resynth = 0;
+#endif
 
    if (st->complexity > 1 && LM>0)
    {
@@ -721,36 +949,35 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
 
    ALLOC(tf_res, st->mode->nbEBands, int);
    /* Needs to be before coarse energy quantization because otherwise the energy gets modified */
-   tf_select = tf_analysis(st->mode, bandLogE, oldBandE, effEnd, C, isTransient, tf_res, nbAvailableBytes, X, N, LM, &tf_sum);
+   tf_select = tf_analysis(st->mode, bandLogE, oldBandE, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum);
    for (i=effEnd;i<st->end;i++)
       tf_res[i] = tf_res[effEnd-1];
 
    ALLOC(error, C*st->mode->nbEBands, celt_word16);
    quant_coarse_energy(st->mode, st->start, st->end, effEnd, bandLogE,
-         oldBandE, nbCompressedBytes*8, st->mode->prob,
-         error, enc, C, LM, nbAvailableBytes, st->force_intra,
+         oldBandE, nbCompressedBytes*8, error, enc,
+         C, LM, nbAvailableBytes, st->force_intra,
          &st->delayedIntra, st->complexity >= 4);
 
    if (LM > 0)
-      ec_enc_bit_prob(enc, shortBlocks!=0, 8192);
+      ec_enc_bit_logp(enc, shortBlocks!=0, 3);
 
-   tf_encode(st->start, st->end, isTransient, tf_res, nbAvailableBytes, LM, tf_select, enc);
+   tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
 
-   if (shortBlocks || st->complexity < 3)
+   if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
    {
       if (st->complexity == 0)
       {
-         has_fold = 0;
-         st->fold_decision = 3;
+         st->spread_decision = SPREAD_NONE;
       } else {
-         has_fold = 1;
-         st->fold_decision = 1;
+         st->spread_decision = SPREAD_NORMAL;
       }
    } else {
-      has_fold = folding_decision(st->mode, X, &st->tonal_average, &st->fold_decision, effEnd, C, M);
+      st->spread_decision = spreading_decision(st->mode, X, &st->tonal_average, st->spread_decision, effEnd, C, M);
    }
-   ec_enc_bit_prob(enc, has_fold>>1, 8192);
-   ec_enc_bit_prob(enc, has_fold&1, (has_fold>>1) ? 32768 : 49152);
+   /* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
+   ec_encode_bin(enc, spread_cdf[st->spread_decision],
+         spread_cdf[st->spread_decision+1], 5);
 
    ALLOC(offsets, st->mode->nbEBands, int);
 
@@ -758,7 +985,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
       offsets[i] = 0;
    /* Dynamic allocation code */
    /* Make sure that dynamic allocation can't make us bust the budget */
-   if (nbCompressedBytes > 30)
+   if (effectiveBytes > 50 && LM>=1)
    {
       int t1, t2;
       if (LM <= 1)
@@ -771,79 +998,93 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
       }
       for (i=1;i<st->mode->nbEBands-1;i++)
       {
-         if (2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1] > SHL16(t1,DB_SHIFT))
+         celt_word32 d2;
+         d2 = 2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1];
+         if (C==2)
+            d2 = HALF32(d2 + 2*bandLogE[i+st->mode->nbEBands]-
+                  bandLogE[i-1+st->mode->nbEBands]-bandLogE[i+1+st->mode->nbEBands]);
+         if (d2 > SHL16(t1,DB_SHIFT))
             offsets[i] += 1;
-         if (2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1] > SHL16(t2,DB_SHIFT))
+         if (d2 > SHL16(t2,DB_SHIFT))
             offsets[i] += 1;
       }
    }
+   dynalloc_prob = 6;
    for (i=0;i<st->mode->nbEBands;i++)
    {
       int j;
-      ec_enc_bit_prob(enc, offsets[i]!=0, 1024);
+      ec_enc_bit_logp(enc, offsets[i]!=0, dynalloc_prob);
       if (offsets[i]!=0)
       {
+         int width, quanta;
+         width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
+         /* quanta is 6 bits, but no more than 1 bit/sample
+            and no less than 1/8 bit/sample */
+         quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
          for (j=0;j<offsets[i]-1;j++)
-            ec_enc_bit_prob(enc, 1, 32768);
-         ec_enc_bit_prob(enc, 0, 32768);
+            ec_enc_bit_logp(enc, 1, 1);
+         ec_enc_bit_logp(enc, 0, 1);
+         offsets[i] *= quanta;
+         /* Making dynalloc more likely */
+         dynalloc_prob = IMAX(2, dynalloc_prob-1);
       }
-      offsets[i] *= (6<<BITRES);
-   }
-   {
-      int trim_index = alloc_trim_analysis(st->mode, X, bandLogE, st->mode->nbEBands, LM, C, N);
-      alloc_trim = trim_coef[trim_index];
-      ec_encode_bin(enc, trim_cdf[trim_index], trim_cdf[trim_index+1], 7);
    }
+   alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE, st->mode->nbEBands, LM, C, N);
+   ec_encode_bin(enc, trim_cdf[alloc_trim], trim_cdf[alloc_trim+1], 7);
 
    /* Variable bitrate */
    if (st->vbr_rate_norm>0)
    {
      celt_word16 alpha;
-     celt_int32 delta;
-     /* The target rate in 16th bits per frame */
+     celt_int32 delta, tell;
+     /* The target rate in 8th bits per frame */
      celt_int32 vbr_rate;
      celt_int32 target;
-     celt_int32 vbr_bound, max_allowed;
+     celt_int32 vbr_bound, max_allowed, min_allowed;
 
-     vbr_rate = M*st->vbr_rate_norm;
+     target = vbr_rate = M*st->vbr_rate_norm;
 
-     /* Computes the max bit-rate allowed in VBR more to avoid busting the budget */
-     vbr_bound = vbr_rate;
-     max_allowed = (vbr_rate + vbr_bound - st->vbr_reservoir)>>(BITRES+3);
-     if (max_allowed < 4)
-        max_allowed = 4;
-     if (max_allowed < nbAvailableBytes)
-        nbAvailableBytes = max_allowed;
-     target=vbr_rate;
-
-     /* Shortblocks get a large boost in bitrate, but since they 
+     target = target + st->vbr_offset - ((40*C+20)<<BITRES);
+
+     /* Shortblocks get a large boost in bitrate, but since they
         are uncommon long blocks are not greatly affected */
      if (shortBlocks || tf_sum < -2*(st->end-st->start))
-        target*=2;
+        target = 7*target/4;
      else if (tf_sum < -(st->end-st->start))
         target = 3*target/2;
      else if (M > 1)
         target-=(target+14)/28;
 
-     /* The average energy is removed from the target and the actual 
-        energy added*/
-     target=target+st->vbr_offset-(50<<BITRES)+ec_enc_tell(enc, BITRES);
+     tell = ec_enc_tell(enc, BITRES);
+
+     /* The current offset is removed from the target and the space used
+        so far is added*/
+     target=target+tell;
+     /* By how much did we "miss" the target on that frame */
+     delta = target - vbr_rate;
 
-     /* In VBR mode the frame size must not be reduced so much that it would result in the coarse energy busting its budget */
-     target=IMIN(nbAvailableBytes<<(BITRES+3),target);
-     /* Make the adaptation coef (alpha) higher at the beginning */
-     if (st->vbr_count < 990)
+     /* Computes the max bit-rate allowed in VBR more to avoid violating the target rate and buffering */
+     vbr_bound = vbr_rate;
+     if (st->constrained_vbr)
+        max_allowed = IMIN(vbr_rate+vbr_bound-st->vbr_reservoir>>(BITRES+3),nbAvailableBytes);
+     else
+        max_allowed = nbAvailableBytes;
+     min_allowed = (tell>>(BITRES+3)) + 2 - nbFilledBytes;
+
+     /* In VBR mode the frame size must not be reduced so much that it would result in the encoder running out of bits */
+     nbAvailableBytes = target+(1<<(BITRES+2))>>(BITRES+3);
+     nbAvailableBytes=IMAX(min_allowed,IMIN(max_allowed,nbAvailableBytes));
+     target=nbAvailableBytes<<(BITRES+3);
+
+     if (st->vbr_count < 970)
      {
         st->vbr_count++;
-        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+10),16));
-        /*printf ("%d %d\n", st->vbr_count+10, alpha);*/
+        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
      } else
         alpha = QCONST16(.001f,15);
-
-     /* By how much did we "miss" the target on that frame */
-     delta = (celt_int32)target - vbr_rate;
      /* How many bits have we used in excess of what we're allowed */
-     st->vbr_reservoir += delta;
+     if (st->constrained_vbr)
+        st->vbr_reservoir += target - vbr_rate;
      /*printf ("%d\n", st->vbr_reservoir);*/
 
      /* Compute the offset we need to apply in order to reach the target */
@@ -852,51 +1093,91 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
      /*printf ("%d\n", st->vbr_drift);*/
 
      /* We could use any multiple of vbr_rate as bound (depending on the delay) */
-     if (st->vbr_reservoir < 0)
+     if (st->constrained_vbr && st->vbr_reservoir < 0)
      {
         /* We're under the min value -- increase rate */
-        int adjust = 1-(st->vbr_reservoir-1)/(8<<BITRES);
-        st->vbr_reservoir += adjust*(8<<BITRES);
-        target += adjust;
+        int adjust = (-st->vbr_reservoir)/(8<<BITRES);
+        nbAvailableBytes += adjust;
+        st->vbr_reservoir = 0;
         /*printf ("+%d\n", adjust);*/
      }
-     if (nbAvailableBytes > target>>(BITRES+3))
-        nbAvailableBytes = target>>(BITRES+3);
-     nbCompressedBytes = nbAvailableBytes + nbFilledBytes;
+     nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
 
      /* This moves the raw bits to take into account the new compressed size */
      ec_byte_shrink(&buf, nbCompressedBytes);
    }
 
+   if (C==2)
+   {
+      /* Always use MS for 2.5 ms frames until we can do a better analysis */
+      if (LM==0)
+         dual_stereo = 0;
+      else
+         dual_stereo = stereo_analysis(st->mode, X, st->mode->nbEBands, LM, C, N);
+      ec_enc_bit_logp(enc, dual_stereo, 1);
+   }
+   if (C==2)
+   {
+      int effectiveRate;
+
+      /* Account for coarse energy */
+      effectiveRate = (8*effectiveBytes - 80)>>LM;
+
+      /* effectiveRate in kb/s */
+      effectiveRate = 2*effectiveRate/5;
+      if (effectiveRate<35)
+         intensity = 8;
+      else if (effectiveRate<50)
+         intensity = 12;
+      else if (effectiveRate<68)
+         intensity = 16;
+      else if (effectiveRate<84)
+         intensity = 18;
+      else if (effectiveRate<102)
+         intensity = 19;
+      else if (effectiveRate<130)
+         intensity = 20;
+      else
+         intensity = 100;
+      intensity = IMIN(st->end,IMAX(st->start, intensity));
+      ec_enc_uint(enc, intensity, 1+st->end-st->start);
+   }
+
    /* Bit allocation */
    ALLOC(fine_quant, st->mode->nbEBands, int);
    ALLOC(pulses, st->mode->nbEBands, int);
    ALLOC(fine_priority, st->mode->nbEBands, int);
 
-   bits = nbCompressedBytes*8 - ec_enc_tell(enc, 0) - 1;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, alloc_trim, bits, pulses, fine_quant, fine_priority, C, LM);
+   /* bits =   packet size        -       where we are           - safety */
+   bits = (nbCompressedBytes*8<<BITRES) - ec_enc_tell(enc, BITRES) - 1;
+   codedBands = compute_allocation(st->mode, st->start, st->end, offsets,
+         alloc_trim, bits, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
+   st->lastCodedBands = codedBands;
 
    quant_fine_energy(st->mode, st->start, st->end, bandE, oldBandE, error, fine_quant, enc, C);
 
 #ifdef MEASURE_NORM_MSE
    float X0[3000];
    float bandE0[60];
-   for (c=0;c<C;c++)
+   c=0; do 
       for (i=0;i<N;i++)
          X0[i+c*N] = X[i+c*N];
+   while (++c<C);
    for (i=0;i<C*st->mode->nbEBands;i++)
       bandE0[i] = bandE[i];
 #endif
 
    /* Residual quantisation */
-   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, bandE, pulses, shortBlocks, has_fold, tf_res, resynth, nbCompressedBytes*8, enc, LM, codedBands);
+   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL,
+         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res, resynth,
+         nbCompressedBytes*8, enc, LM, codedBands);
 
    quant_energy_finalise(st->mode, st->start, st->end, bandE, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_enc_tell(enc, 0), enc, C);
 
+#ifdef RESYNTH
    /* Re-synthesis of the coded audio if required */
    if (resynth)
    {
-      VARDECL(celt_sig, _out_mem);
       celt_sig *out_mem[2];
       celt_sig *overlap_mem[2];
 
@@ -909,30 +1190,54 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
       /* Synthesis */
       denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M);
 
-      for (c=0;c<C;c++)
+      CELT_MOVE(st->syn_mem[0], st->syn_mem[0]+N, MAX_PERIOD);
+      if (C==2)
+         CELT_MOVE(st->syn_mem[1], st->syn_mem[1]+N, MAX_PERIOD);
+
+      c=0; do
          for (i=0;i<M*st->mode->eBands[st->start];i++)
             freq[c*N+i] = 0;
-      for (c=0;c<C;c++)
+      while (++c<C);
+      c=0; do
          for (i=M*st->mode->eBands[st->end];i<N;i++)
             freq[c*N+i] = 0;
+      while (++c<C);
 
-      ALLOC(_out_mem, C*N, celt_sig);
+      out_mem[0] = st->syn_mem[0]+MAX_PERIOD;
+      if (C==2)
+         out_mem[1] = st->syn_mem[1]+MAX_PERIOD;
 
-      for (c=0;c<C;c++)
-      {
+      c=0; do
          overlap_mem[c] = _overlap_mem + c*st->overlap;
-         out_mem[c] = _out_mem+c*N;
-      }
+      while (++c<C);
 
       compute_inv_mdcts(st->mode, shortBlocks, freq, out_mem, overlap_mem, C, LM);
 
-      /* De-emphasis and put everything back at the right place 
-         in the synthesis history */
-      if (optional_resynthesis != NULL) {
-         deemphasis(out_mem, optional_resynthesis, N, C, st->mode->preemph, st->preemph_memD);
+#ifdef ENABLE_POSTFILTER
+      c=0; do {
+         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
+         st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
+         if (LM!=0)
+         {
+            comb_filter(out_mem[c], out_mem[c], st->prefilter_period, st->prefilter_period, st->overlap, C,
+                  st->prefilter_gain, st->prefilter_gain, NULL, 0);
+            comb_filter(out_mem[c]+st->overlap, out_mem[c]+st->overlap, st->prefilter_period, pitch_index, N-st->overlap, C,
+                  st->prefilter_gain, gain1, st->mode->window, st->mode->overlap);
+         } else {
+            comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, N, C,
+                  st->prefilter_gain_old, st->prefilter_gain, st->mode->window, st->mode->overlap);
+         }
+      } while (++c<C);
+#endif /* ENABLE_POSTFILTER */
 
-      }
+      deemphasis(out_mem, (celt_word16*)pcm, N, C, st->mode->preemph, st->preemph_memD);
+      st->prefilter_period_old = st->prefilter_period;
+      st->prefilter_gain_old = st->prefilter_gain;
    }
+#endif
+
+   st->prefilter_period = pitch_index;
+   st->prefilter_gain = gain1;
 
    /* If there's any room left (can only happen for very high rates),
       fill it with zeros */
@@ -949,7 +1254,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
 
 #ifdef FIXED_POINT
 #ifndef DISABLE_FLOAT_API
-int celt_encode_with_ec_float(CELTEncoder * restrict st, const float * pcm, float * optional_resynthesis, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+int celt_encode_with_ec_float(CELTEncoder * restrict st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
 {
    int j, ret, C, N, LM, M;
    VARDECL(celt_int16, in);
@@ -972,20 +1277,18 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const float * pcm, floa
    for (j=0;j<C*N;j++)
      in[j] = FLOAT2INT16(pcm[j]);
 
-   if (optional_resynthesis != NULL) {
-     ret=celt_encode_with_ec(st,in,in,frame_size,compressed,nbCompressedBytes, enc);
-      for (j=0;j<C*N;j++)
-         optional_resynthesis[j]=in[j]*(1.f/32768.f);
-   } else {
-     ret=celt_encode_with_ec(st,in,NULL,frame_size,compressed,nbCompressedBytes, enc);
-   }
+   ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, enc);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((float*)pcm)[j]=in[j]*(1.f/32768.f);
+#endif
    RESTORE_STACK;
    return ret;
 
 }
 #endif /*DISABLE_FLOAT_API*/
 #else
-int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, celt_int16 * optional_resynthesis, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
+int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
 {
    int j, ret, C, N, LM, M;
    VARDECL(celt_sig, in);
@@ -1008,13 +1311,11 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, celt_
      in[j] = SCALEOUT(pcm[j]);
    }
 
-   if (optional_resynthesis != NULL) {
-      ret = celt_encode_with_ec_float(st,in,in,frame_size,compressed,nbCompressedBytes, enc);
-      for (j=0;j<C*N;j++)
-         optional_resynthesis[j] = FLOAT2INT16(in[j]);
-   } else {
-      ret = celt_encode_with_ec_float(st,in,NULL,frame_size,compressed,nbCompressedBytes, enc);
-   }
+   ret = celt_encode_with_ec_float(st,in,frame_size,compressed,nbCompressedBytes, enc);
+#ifdef RESYNTH
+   for (j=0;j<C*N;j++)
+      ((celt_int16*)pcm)[j] = FLOAT2INT16(in[j]);
+#endif
    RESTORE_STACK;
    return ret;
 }
@@ -1022,29 +1323,16 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, celt_
 
 int celt_encode(CELTEncoder * restrict st, const celt_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
 {
-   return celt_encode_with_ec(st, pcm, NULL, frame_size, compressed, nbCompressedBytes, NULL);
+   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
 }
 
 #ifndef DISABLE_FLOAT_API
 int celt_encode_float(CELTEncoder * restrict st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
 {
-   return celt_encode_with_ec_float(st, pcm, NULL, frame_size, compressed, nbCompressedBytes, NULL);
+   return celt_encode_with_ec_float(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
 }
 #endif /* DISABLE_FLOAT_API */
 
-int celt_encode_resynthesis(CELTEncoder * restrict st, const celt_int16 * pcm, celt_int16 * optional_resynthesis, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   return celt_encode_with_ec(st, pcm, optional_resynthesis, frame_size, compressed, nbCompressedBytes, NULL);
-}
-
-#ifndef DISABLE_FLOAT_API
-int celt_encode_resynthesis_float(CELTEncoder * restrict st, const float * pcm, float * optional_resynthesis, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   return celt_encode_with_ec_float(st, pcm, optional_resynthesis, frame_size, compressed, nbCompressedBytes, NULL);
-}
-#endif /* DISABLE_FLOAT_API */
-
-
 int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
 {
    va_list ap;
@@ -1099,6 +1387,12 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
          }   
       }
       break;
+      case CELT_SET_VBR_CONSTRAINT_REQUEST:
+      {
+         celt_int32 value = va_arg(ap, celt_int32);
+         st->constrained_vbr = value;
+      }
+      break;
       case CELT_SET_VBR_RATE_REQUEST:
       {
          celt_int32 value = va_arg(ap, celt_int32);
@@ -1117,8 +1411,9 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
          CELT_MEMSET((char*)&st->ENCODER_RESET_START, 0,
                celt_encoder_get_size(st->mode, st->channels)-
                ((char*)&st->ENCODER_RESET_START - (char*)st));
+         st->vbr_offset = 0;
          st->delayedIntra = 1;
-         st->fold_decision = 1;
+         st->spread_decision = SPREAD_NORMAL;
          st->tonal_average = QCONST16(1.f,8);
       }
       break;
@@ -1157,6 +1452,10 @@ struct CELTDecoder {
 
    int last_pitch_index;
    int loss_count;
+   int postfilter_period;
+   int postfilter_period_old;
+   celt_word16 postfilter_gain;
+   celt_word16 postfilter_gain_old;
 
    celt_sig preemph_memD[2];
    
@@ -1233,12 +1532,11 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
    celt_word16 *lpc;
    SAVE_STACK;
    
-   for (c=0;c<C;c++)
-   {
+   c=0; do {
       decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
       out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
       overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
-   }
+   } while (++c<C);
    lpc = (celt_word16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C);
 
    len = N+st->mode->overlap;
@@ -1267,8 +1565,7 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
          fade = 0;
    }
 
-   for (c=0;c<C;c++)
-   {
+   c=0; do {
       /* FIXME: This is more memory than necessary */
       celt_word32 e[2*MAX_PERIOD];
       celt_word16 exc[2*MAX_PERIOD];
@@ -1305,6 +1602,8 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
 
          _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
       }
+      for (i=0;i<LPC_ORDER;i++)
+         mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT);
       fir(exc, lpc+c*LPC_ORDER, exc, MAX_PERIOD, LPC_ORDER, mem);
       /*for (i=0;i<MAX_PERIOD;i++)printf("%d ", exc[i]); printf("\n");*/
       /* Check if the waveform is decaying (and if so how fast) */
@@ -1328,21 +1627,29 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
       /* Copy excitation, taking decay into account */
       for (i=0;i<len+st->mode->overlap;i++)
       {
+         celt_word16 tmp;
          if (offset+i >= MAX_PERIOD)
          {
             offset -= pitch_index;
             decay = MULT16_16_Q15(decay, decay);
          }
          e[i] = SHL32(EXTEND32(MULT16_16_Q15(decay, exc[offset+i])), SIG_SHIFT);
-         S1 += SHR32(MULT16_16(out_mem[c][offset+i],out_mem[c][offset+i]),8);
+         tmp = ROUND16(out_mem[c][offset+i],SIG_SHIFT);
+         S1 += SHR32(MULT16_16(tmp,tmp),8);
       }
-
+      for (i=0;i<LPC_ORDER;i++)
+         mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT);
+      for (i=0;i<len+st->mode->overlap;i++)
+         e[i] = MULT16_32_Q15(fade, e[i]);
       iir(e, lpc+c*LPC_ORDER, e, len+st->mode->overlap, LPC_ORDER, mem);
 
       {
          celt_word32 S2=0;
          for (i=0;i<len+overlap;i++)
-            S2 += SHR32(MULT16_16(e[i],e[i]),8);
+         {
+            celt_word16 tmp = ROUND16(e[i],SIG_SHIFT);
+            S2 += SHR32(MULT16_16(tmp,tmp),8);
+         }
          /* This checks for an "explosion" in the synthesis */
 #ifdef FIXED_POINT
          if (!(S1 > SHR32(S2,2)))
@@ -1357,10 +1664,16 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
          {
             celt_word16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
             for (i=0;i<len+overlap;i++)
-               e[i] = MULT16_16_Q15(ratio, e[i]);
+               e[i] = MULT16_32_Q15(ratio, e[i]);
          }
       }
 
+#ifdef ENABLE_POSTFILTER
+      /* Apply post-filter to the MDCT overlap of the previous frame */
+      comb_filter(out_mem[c]+MAX_PERIOD, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap, C,
+                  st->postfilter_gain, st->postfilter_gain, NULL, 0);
+#endif /* ENABLE_POSTFILTER */
+
       for (i=0;i<MAX_PERIOD+st->mode->overlap-N;i++)
          out_mem[c][i] = out_mem[c][N+i];
 
@@ -1368,23 +1681,31 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
          previous and next frames */
       for (i=0;i<overlap/2;i++)
       {
-         celt_word32 tmp1, tmp2;
-         tmp1 = MULT16_32_Q15(st->mode->window[i          ], e[i          ]) -
-                MULT16_32_Q15(st->mode->window[overlap-i-1], e[overlap-i-1]);
-         tmp2 = MULT16_32_Q15(st->mode->window[i],           e[N+overlap-1-i]) +
-                MULT16_32_Q15(st->mode->window[overlap-i-1], e[N+i          ]);
-         tmp1 = MULT16_32_Q15(fade, tmp1);
-         tmp2 = MULT16_32_Q15(fade, tmp2);
-         out_mem[c][MAX_PERIOD+i] = MULT16_32_Q15(st->mode->window[overlap-i-1], tmp2);
-         out_mem[c][MAX_PERIOD+overlap-i-1] = MULT16_32_Q15(st->mode->window[i], tmp2);
-         out_mem[c][MAX_PERIOD-N+i] += MULT16_32_Q15(st->mode->window[i], tmp1);
-         out_mem[c][MAX_PERIOD-N+overlap-i-1] -= MULT16_32_Q15(st->mode->window[overlap-i-1], tmp1);
+         celt_word32 tmp;
+         tmp = MULT16_32_Q15(st->mode->window[i],           e[N+overlap-1-i]) +
+               MULT16_32_Q15(st->mode->window[overlap-i-1], e[N+i          ]);
+         out_mem[c][MAX_PERIOD+i] = MULT16_32_Q15(st->mode->window[overlap-i-1], tmp);
+         out_mem[c][MAX_PERIOD+overlap-i-1] = MULT16_32_Q15(st->mode->window[i], tmp);
       }
-      for (i=0;i<N-overlap;i++)
-         out_mem[c][MAX_PERIOD-N+overlap+i] = MULT16_32_Q15(fade, e[overlap+i]);
-   }
+      for (i=0;i<N;i++)
+         out_mem[c][MAX_PERIOD-N+i] = e[i];
+
+#ifdef ENABLE_POSTFILTER
+      /* Apply pre-filter to the MDCT overlap for the next frame (post-filter will be applied then) */
+      comb_filter(e, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap, C,
+                  -st->postfilter_gain, -st->postfilter_gain, NULL, 0);
+#endif /* ENABLE_POSTFILTER */
+      for (i=0;i<overlap;i++)
+         out_mem[c][MAX_PERIOD+i] = e[i];
+   } while (++c<C);
 
-   deemphasis(out_mem, pcm, N, C, st->mode->preemph, st->preemph_memD);
+   {
+      celt_word32 *out_syn[2];
+      out_syn[0] = out_mem[0]+MAX_PERIOD-N;
+      if (C==2)
+         out_syn[1] = out_mem[1]+MAX_PERIOD-N;
+      deemphasis(out_syn, pcm, N, C, st->mode->preemph, st->preemph_memD);
+   }
    
    st->loss_count++;
 
@@ -1399,7 +1720,7 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
 {
 #endif
    int c, i, N;
-   int has_fold;
+   int spread_decision;
    int bits;
    ec_dec _dec;
    ec_byte_buffer buf;
@@ -1427,6 +1748,11 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    int effEnd;
    int codedBands;
    int alloc_trim;
+   int postfilter_pitch;
+   celt_word16 postfilter_gain;
+   int intensity=0;
+   int dual_stereo=0;
+   int dynalloc_prob;
    SAVE_STACK;
 
    if (pcm==NULL)
@@ -1439,12 +1765,11 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
       return CELT_BAD_ARG;
    M=1<<LM;
 
-   for (c=0;c<C;c++)
-   {
+   c=0; do {
       decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
       out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
       overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
-   }
+   } while (++c<C);
    lpc = (celt_word16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C);
    oldBandE = lpc+C*LPC_ORDER;
 
@@ -1457,12 +1782,14 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
    ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
    ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
-   for (c=0;c<C;c++)
+   c=0; do
       for (i=0;i<M*st->mode->eBands[st->start];i++)
          X[c*N+i] = 0;
-   for (c=0;c<C;c++)
+   while (++c<C);
+   c=0; do   
       for (i=M*st->mode->eBands[effEnd];i<N;i++)
          X[c*N+i] = 0;
+   while (++c<C);
 
    if (data == NULL)
    {
@@ -1486,14 +1813,32 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    }
    nbAvailableBytes = len-nbFilledBytes;
 
+   if (ec_dec_bit_logp(dec, 1))
+   {
+#ifdef ENABLE_POSTFILTER
+      int qg, octave;
+      octave = ec_dec_uint(dec, 6);
+      postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave);
+      qg = ec_dec_bits(dec, 2);
+      postfilter_gain = QCONST16(.125f,15)*(qg+2);
+#else /* ENABLE_POSTFILTER */
+      RESTORE_STACK;
+      return CELT_CORRUPTED_DATA;
+#endif /* ENABLE_POSTFILTER */
+
+   } else {
+      postfilter_gain = 0;
+      postfilter_pitch = 0;
+   }
+
    /* Decode the global flags (first symbols in the stream) */
-   intra_ener = ec_dec_bit_prob(dec, 8192);
+   intra_ener = ec_dec_bit_logp(dec, 3);
    /* Get band energies */
    unquant_coarse_energy(st->mode, st->start, st->end, bandE, oldBandE,
-         intra_ener, st->mode->prob, dec, C, LM);
+         intra_ener, dec, C, LM);
 
    if (LM > 0)
-      isTransient = ec_dec_bit_prob(dec, 8192);
+      isTransient = ec_dec_bit_logp(dec, 3);
    else
       isTransient = 0;
 
@@ -1503,10 +1848,9 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
       shortBlocks = 0;
 
    ALLOC(tf_res, st->mode->nbEBands, int);
-   tf_decode(st->start, st->end, C, isTransient, tf_res, nbAvailableBytes, LM, dec);
+   tf_decode(st->start, st->end, C, isTransient, tf_res, LM, dec);
 
-   has_fold = ec_dec_bit_prob(dec, 8192)<<1;
-   has_fold |= ec_dec_bit_prob(dec, (has_fold>>1) ? 32768 : 49152);
+   spread_decision = ec_dec_cdf(dec, spread_cdf, 5);
 
    ALLOC(pulses, st->mode->nbEBands, int);
    ALLOC(offsets, st->mode->nbEBands, int);
@@ -1514,35 +1858,44 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
 
    for (i=0;i<st->mode->nbEBands;i++)
       offsets[i] = 0;
+   dynalloc_prob = 6;
    for (i=0;i<st->mode->nbEBands;i++)
    {
-      if (ec_dec_bit_prob(dec, 1024))
+      if (ec_dec_bit_logp(dec, dynalloc_prob))
       {
-         while (ec_dec_bit_prob(dec, 32768))
+         int width, quanta;
+         width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
+         /* quanta is 6 bits, but no more than 1 bit/sample
+            and no less than 1/8 bit/sample */
+         quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+         while (ec_dec_bit_logp(dec, 1))
             offsets[i]++;
          offsets[i]++;
-         offsets[i] *= (6<<BITRES);
+         offsets[i] *= quanta;
+         /* Making dynalloc more likely */
+         dynalloc_prob = IMAX(2, dynalloc_prob-1);
       }
    }
 
    ALLOC(fine_quant, st->mode->nbEBands, int);
+   alloc_trim = ec_dec_cdf(dec, trim_cdf, 7);
+
+   if (C==2)
    {
-      int fl;
-      int trim_index=0;
-      fl = ec_decode_bin(dec, 7);
-      while (trim_cdf[trim_index+1] <= fl)
-         trim_index++;
-      ec_dec_update(dec, trim_cdf[trim_index], trim_cdf[trim_index+1], 128);
-      alloc_trim = trim_coef[trim_index];
+      dual_stereo = ec_dec_bit_logp(dec, 1);
+      intensity = ec_dec_uint(dec, 1+st->end-st->start);
    }
 
-   bits = len*8 - ec_dec_tell(dec, 0) - 1;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, alloc_trim, bits, pulses, fine_quant, fine_priority, C, LM);
+   bits = (len*8<<BITRES) - ec_dec_tell(dec, BITRES) - 1;
+   codedBands = compute_allocation(st->mode, st->start, st->end, offsets,
+         alloc_trim, bits, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0);
    
    unquant_fine_energy(st->mode, st->start, st->end, bandE, oldBandE, fine_quant, dec, C);
 
    /* Decode fixed codebook */
-   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, NULL, pulses, shortBlocks, has_fold, tf_res, 1, len*8, dec, LM, codedBands);
+   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL,
+         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 1,
+         len*8, dec, LM, codedBands);
 
    unquant_energy_finalise(st->mode, st->start, st->end, bandE, oldBandE,
          fine_quant, fine_priority, len*8-ec_dec_tell(dec, 0), dec, C);
@@ -1556,12 +1909,14 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    if (C==2)
       CELT_MOVE(decode_mem[1], decode_mem[1]+N, DECODE_BUFFER_SIZE-N);
 
-   for (c=0;c<C;c++)
+   c=0; do
       for (i=0;i<M*st->mode->eBands[st->start];i++)
          freq[c*N+i] = 0;
-   for (c=0;c<C;c++)
+   while (++c<C);
+   c=0; do
       for (i=M*st->mode->eBands[effEnd];i<N;i++)
          freq[c*N+i] = 0;
+   while (++c<C);
 
    out_syn[0] = out_mem[0]+MAX_PERIOD-N;
    if (C==2)
@@ -1570,10 +1925,31 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    /* Compute inverse MDCTs */
    compute_inv_mdcts(st->mode, shortBlocks, freq, out_syn, overlap_mem, C, LM);
 
+#ifdef ENABLE_POSTFILTER
+   c=0; do {
+      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
+      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
+      if (LM!=0)
+      {
+         comb_filter(out_syn[c], out_syn[c], st->postfilter_period, st->postfilter_period, st->overlap, C,
+               st->postfilter_gain, st->postfilter_gain, NULL, 0);
+         comb_filter(out_syn[c]+st->overlap, out_syn[c]+st->overlap, st->postfilter_period, postfilter_pitch, N-st->overlap, C,
+               st->postfilter_gain, postfilter_gain, st->mode->window, st->mode->overlap);
+      } else {
+         comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, N-st->overlap, C,
+               st->postfilter_gain_old, st->postfilter_gain, st->mode->window, st->mode->overlap);
+      }
+   } while (++c<C);
+   st->postfilter_period_old = st->postfilter_period;
+   st->postfilter_gain_old = st->postfilter_gain;
+   st->postfilter_period = postfilter_pitch;
+   st->postfilter_gain = postfilter_gain;
+#endif /* ENABLE_POSTFILTER */
+
    deemphasis(out_syn, pcm, N, C, st->mode->preemph, st->preemph_memD);
    st->loss_count = 0;
    RESTORE_STACK;
-   if (ec_dec_get_error(dec))
+   if (ec_dec_tell(dec,0) > 8*len || ec_dec_get_error(dec))
       return CELT_CORRUPTED_DATA;
    else
       return CELT_OK;