Propagate balance from compute_allocation() to quant_all_bands().
[opus.git] / libcelt / celt.c
index c05120b..c6c8124 100644 (file)
 #include "float_cast.h"
 #include <stdarg.h>
 #include "plc.h"
+#include "vq.h"
 
 static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
 /* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
 static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
 
+static const unsigned char tapset_icdf[3]={2,1,0};
+
 #define COMBFILTER_MAXPERIOD 1024
-#define COMBFILTER_MINPERIOD 16
+#define COMBFILTER_MINPERIOD 15
+
+static int resampling_factor(celt_int32 rate)
+{
+   int ret;
+   switch (rate)
+   {
+   case 48000:
+      ret = 1;
+      break;
+   case 24000:
+      ret = 2;
+      break;
+   case 16000:
+      ret = 3;
+      break;
+   case 12000:
+      ret = 4;
+      break;
+   case 8000:
+      ret = 6;
+      break;
+   default:
+      ret = 0;
+   }
+   return ret;
+}
 
 /** Encoder state 
  @brief Encoder state
@@ -67,29 +96,36 @@ struct CELTEncoder {
    const CELTMode *mode;     /**< Mode used by the encoder */
    int overlap;
    int channels;
+   int stream_channels;
    
    int force_intra;
    int complexity;
+   int upsample;
    int start, end;
 
    celt_int32 vbr_rate_norm; /* Target number of 8th bits per frame */
    int constrained_vbr;      /* If zero, VBR can do whatever it likes with the rate */
 
    /* Everything beyond this point gets cleared on a reset */
-#define ENCODER_RESET_START frame_max
+#define ENCODER_RESET_START rng
 
-   celt_word32 frame_max;
+   ec_uint32 rng;
    int spread_decision;
    int delayedIntra;
    int tonal_average;
    int lastCodedBands;
+   int hf_average;
+   int tapset_decision;
 
    int prefilter_period;
    celt_word16 prefilter_gain;
+   int prefilter_tapset;
 #ifdef RESYNTH
    int prefilter_period_old;
    celt_word16 prefilter_gain_old;
+   int prefilter_tapset_old;
 #endif
+   int consec_transient;
 
    /* VBR-related parameters */
    celt_int32 vbr_reservoir;
@@ -107,26 +143,61 @@ struct CELTEncoder {
    celt_sig in_mem[1]; /* Size = channels*mode->overlap */
    /* celt_sig prefilter_mem[],  Size = channels*COMBFILTER_PERIOD */
    /* celt_sig overlap_mem[],  Size = channels*mode->overlap */
-   /* celt_word16 oldEBands[], Size = channels*mode->nbEBands */
+   /* celt_word16 oldEBands[], Size = 2*channels*mode->nbEBands */
 };
 
-int celt_encoder_get_size(const CELTMode *mode, int channels)
+int celt_encoder_get_size(int channels)
+{
+   CELTMode *mode = celt_mode_create(48000, 960, NULL);
+   return celt_encoder_get_size_custom(mode, channels);
+}
+
+int celt_encoder_get_size_custom(const CELTMode *mode, int channels)
 {
    int size = sizeof(struct CELTEncoder)
          + (2*channels*mode->overlap-1)*sizeof(celt_sig)
          + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig)
-         + channels*mode->nbEBands*sizeof(celt_word16);
+         + 3*channels*mode->nbEBands*sizeof(celt_word16);
    return size;
 }
 
-CELTEncoder *celt_encoder_create(const CELTMode *mode, int channels, int *error)
+CELTEncoder *celt_encoder_create(int sampling_rate, int channels, int *error)
+{
+   CELTEncoder *st;
+   st = (CELTEncoder *)celt_alloc(celt_encoder_get_size(channels));
+   if (st!=NULL && celt_encoder_init(st, sampling_rate, channels, error)==NULL)
+   {
+      celt_encoder_destroy(st);
+      st = NULL;
+   }
+   return st;
+}
+
+CELTEncoder *celt_encoder_create_custom(const CELTMode *mode, int channels, int *error)
 {
-   return celt_encoder_init(
-         (CELTEncoder *)celt_alloc(celt_encoder_get_size(mode, channels)),
-         mode, channels, error);
+   CELTEncoder *st = (CELTEncoder *)celt_alloc(celt_encoder_get_size_custom(mode, channels));
+   if (st!=NULL && celt_encoder_init_custom(st, mode, channels, error)==NULL)
+   {
+      celt_encoder_destroy(st);
+      st = NULL;
+   }
+   return st;
 }
 
-CELTEncoder *celt_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels, int *error)
+CELTEncoder *celt_encoder_init(CELTEncoder *st, int sampling_rate, int channels, int *error)
+{
+   celt_encoder_init_custom(st, celt_mode_create(48000, 960, NULL), channels, error);
+   st->upsample = resampling_factor(sampling_rate);
+   if (st->upsample==0)
+   {
+      if (error)
+         *error = CELT_BAD_ARG;
+      return NULL;
+   }
+   return st;
+}
+
+CELTEncoder *celt_encoder_init_custom(CELTEncoder *st, const CELTMode *mode, int channels, int *error)
 {
    if (channels < 0 || channels > 2)
    {
@@ -135,19 +206,20 @@ CELTEncoder *celt_encoder_init(CELTEncoder *st, const CELTMode *mode, int channe
       return NULL;
    }
 
-   if (st==NULL)
+   if (st==NULL || mode==NULL)
    {
       if (error)
          *error = CELT_ALLOC_FAIL;
       return NULL;
    }
 
-   CELT_MEMSET((char*)st, 0, celt_encoder_get_size(mode, channels));
+   CELT_MEMSET((char*)st, 0, celt_encoder_get_size_custom(mode, channels));
    
    st->mode = mode;
    st->overlap = mode->overlap;
-   st->channels = channels;
+   st->stream_channels = st->channels = channels;
 
+   st->upsample = 1;
    st->start = 0;
    st->end = st->mode->effEBands;
    st->constrained_vbr = 1;
@@ -158,6 +230,8 @@ CELTEncoder *celt_encoder_init(CELTEncoder *st, const CELTMode *mode, int channe
    st->delayedIntra = 1;
    st->tonal_average = 256;
    st->spread_decision = SPREAD_NORMAL;
+   st->hf_average = 0;
+   st->tapset_decision = 0;
    st->complexity = 5;
 
    if (error)
@@ -191,7 +265,7 @@ static inline celt_word16 SIG2WORD16(celt_sig x)
 }
 
 static int transient_analysis(const celt_word32 * restrict in, int len, int C,
-                              celt_word32 *frame_max, int overlap)
+                              int overlap)
 {
    int i;
    VARDECL(celt_word16, tmp);
@@ -362,10 +436,11 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
    } while (++c<C);
 }
 
-static void deemphasis(celt_sig *in[], celt_word16 *pcm, int N, int _C, const celt_word16 *coef, celt_sig *mem)
+static void deemphasis(celt_sig *in[], celt_word16 *pcm, int N, int _C, int downsample, const celt_word16 *coef, celt_sig *mem)
 {
    const int C = CHANNELS(_C);
    int c;
+   int count=0;
    c=0; do {
       int j;
       celt_sig * restrict x;
@@ -379,9 +454,15 @@ static void deemphasis(celt_sig *in[], celt_word16 *pcm, int N, int _C, const ce
          m = MULT16_32_Q15(coef[0], tmp)
            - MULT16_32_Q15(coef[1], *x);
          tmp = SHL32(MULT16_32_Q15(coef[3], tmp), 2);
-         *y = SCALEOUT(SIG2WORD16(tmp));
          x++;
-         y+=C;
+         /* Technically the store could be moved outside of the if because
+            the stores we don't want will just be overwritten */
+         if (++count==downsample)
+         {
+            *y = SCALEOUT(SIG2WORD16(tmp));
+            y+=C;
+            count=0;
+         }
       }
       mem[c] = m;
    } while (++c<C);
@@ -389,40 +470,46 @@ static void deemphasis(celt_sig *in[], celt_word16 *pcm, int N, int _C, const ce
 
 #ifdef ENABLE_POSTFILTER
 static void comb_filter(celt_word32 *y, celt_word32 *x, int T0, int T1, int N,
-      int C, celt_word16 g0, celt_word16 g1, const celt_word16 *window, int overlap)
+      celt_word16 g0, celt_word16 g1, int tapset0, int tapset1,
+      const celt_word16 *window, int overlap)
 {
    int i;
    /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
    celt_word16 g00, g01, g02, g10, g11, g12;
-   celt_word16 t0, t1, t2;
-   /* zeros at theta = +/- 5*pi/6 */
-   t0 = QCONST16(.26795f, 15);
-   t1 = QCONST16(.46410f, 15);
-   t2 = QCONST16(.26795f, 15);
-   g00 = MULT16_16_Q15(g0, t0);
-   g01 = MULT16_16_Q15(g0, t1);
-   g02 = MULT16_16_Q15(g0, t2);
-   g10 = MULT16_16_Q15(g1, t0);
-   g11 = MULT16_16_Q15(g1, t1);
-   g12 = MULT16_16_Q15(g1, t2);
+   static const celt_word16 gains[3][3] = {
+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
+   g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
    for (i=0;i<overlap;i++)
    {
       celt_word16 f;
       f = MULT16_16_Q15(window[i],window[i]);
       y[i] = x[i]
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0])
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0-1])
-               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0+1])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x[i-T1-1])
-               + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1+1]);
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0-1])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0+1])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0-2])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0+2])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x[i-T1])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1-1])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1+1])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1-2])
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1+2]);
 
    }
    for (i=overlap;i<N;i++)
       y[i] = x[i]
-               + MULT16_32_Q15(g11,x[i-T1])
-               + MULT16_32_Q15(g10,x[i-T1-1])
-               + MULT16_32_Q15(g12,x[i-T1+1]);
+               + MULT16_32_Q15(g10,x[i-T1])
+               + MULT16_32_Q15(g11,x[i-T1-1])
+               + MULT16_32_Q15(g11,x[i-T1+1])
+               + MULT16_32_Q15(g12,x[i-T1-2])
+               + MULT16_32_Q15(g12,x[i-T1+2]);
 }
 #endif /* ENABLE_POSTFILTER */
 
@@ -430,7 +517,7 @@ static const signed char tf_select_table[4][8] = {
       {0, -1, 0, -1,    0,-1, 0,-1},
       {0, -1, 0, -2,    1, 0, 1,-1},
       {0, -2, 0, -3,    2, 0, 1,-1},
-      {0, -2, 0, -3,    2, 0, 1,-1},
+      {0, -2, 0, -3,    3, 0, 1,-1},
 };
 
 static celt_word32 l1_metric(const celt_norm *tmp, int N, int LM, int width)
@@ -591,33 +678,79 @@ static int tf_analysis(const CELTMode *m, celt_word16 *bandLogE, celt_word16 *ol
 static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
 {
    int curr, i;
-   if (LM!=0)
-      ec_enc_bit_logp(enc, tf_select, 1);
-   ec_enc_bit_logp(enc, tf_res[start], isTransient ? 2 : 4);
-   curr = tf_res[start];
-   for (i=start+1;i<end;i++)
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   ec_uint32 budget;
+   ec_uint32 tell;
+   budget = enc->buf->storage*8;
+   tell = ec_enc_tell(enc, 0);
+   logp = isTransient ? 2 : 4;
+   /* Reserve space to code the tf_select decision. */
+   tf_select_rsv = LM>0 && tell+logp+1 <= budget;
+   budget -= tf_select_rsv;
+   curr = tf_changed = 0;
+   for (i=start;i<end;i++)
    {
-      ec_enc_bit_logp(enc, tf_res[i] ^ curr, isTransient ? 4 : 5);
-      curr = tf_res[i];
+      if (tell+logp<=budget)
+      {
+         ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
+         tell = ec_enc_tell(enc, 0);
+         curr = tf_res[i];
+         tf_changed |= curr;
+      }
+      else
+         tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
    }
+   /* Only code tf_select if it would actually make a difference. */
+   if (tf_select_rsv &&
+         tf_select_table[LM][4*isTransient+0+tf_changed]!=
+         tf_select_table[LM][4*isTransient+2+tf_changed])
+      ec_enc_bit_logp(enc, tf_select, 1);
+   else
+      tf_select = 0;
    for (i=start;i<end;i++)
       tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
    /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/
 }
 
-static void tf_decode(int start, int end, int C, int isTransient, int *tf_res, int LM, ec_dec *dec)
+static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
 {
    int i, curr, tf_select;
-   if (LM!=0)
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   ec_uint32 budget;
+   ec_uint32 tell;
+
+   budget = dec->buf->storage*8;
+   tell = ec_dec_tell(dec, 0);
+   logp = isTransient ? 2 : 4;
+   tf_select_rsv = LM>0 && tell+logp+1<=budget;
+   budget -= tf_select_rsv;
+   tf_changed = curr = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         curr ^= ec_dec_bit_logp(dec, logp);
+         tell = ec_dec_tell(dec, 0);
+         tf_changed |= curr;
+      }
+      tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   tf_select = 0;
+   if (tf_select_rsv &&
+     tf_select_table[LM][4*isTransient+0+tf_changed] !=
+     tf_select_table[LM][4*isTransient+2+tf_changed])
+   {
       tf_select = ec_dec_bit_logp(dec, 1);
-   else
-      tf_select = 0;
-   curr = ec_dec_bit_logp(dec, isTransient ? 2 : 4);
-   tf_res[start] = tf_select_table[LM][4*isTransient+2*tf_select+curr];
-   for (i=start+1;i<end;i++)
+   }
+   for (i=start;i<end;i++)
    {
-      curr = ec_dec_bit_logp(dec, isTransient ? 4 : 5) ^ curr;
-      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+curr];
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
    }
 }
 
@@ -678,7 +811,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
 }
 
 static int stereo_analysis(const CELTMode *m, const celt_norm *X,
-      int nbEBands, int LM, int C, int N0)
+      int LM, int N0)
 {
    int i;
    int thetas;
@@ -727,16 +860,19 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
    VARDECL(int, fine_quant);
    VARDECL(celt_word16, error);
    VARDECL(int, pulses);
+   VARDECL(int, cap);
    VARDECL(int, offsets);
    VARDECL(int, fine_priority);
    VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
    celt_sig *_overlap_mem;
    celt_sig *prefilter_mem;
-   celt_word16 *oldBandE;
+   celt_word16 *oldBandE, *oldLogE, *oldLogE2;
    int shortBlocks=0;
    int isTransient=0;
    int resynth;
-   const int C = CHANNELS(st->channels);
+   const int CC = CHANNELS(st->channels);
+   const int C = CHANNELS(st->stream_channels);
    int LM, M;
    int tf_select;
    int nbFilledBytes, nbAvailableBytes;
@@ -750,12 +886,23 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
    int dual_stereo=0;
    int effectiveBytes;
    celt_word16 pf_threshold;
-   int dynalloc_prob;
+   int dynalloc_logp;
+   celt_int32 vbr_rate;
+   celt_int32 total_bits;
+   celt_int32 total_boost;
+   celt_int32 balance;
+   celt_int32 tell;
+   int prefilter_tapset=0;
+   int pf_on;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence=0;
    SAVE_STACK;
 
-   if (nbCompressedBytes<0 || pcm==NULL)
+   if (nbCompressedBytes<2 || pcm==NULL)
      return CELT_BAD_ARG;
 
+   frame_size *= st->upsample;
    for (LM=0;LM<4;LM++)
       if (st->mode->shortMdctSize<<LM==frame_size)
          break;
@@ -763,33 +910,63 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
       return CELT_BAD_ARG;
    M=1<<LM;
 
-   prefilter_mem = st->in_mem+C*(st->overlap);
-   _overlap_mem = prefilter_mem+C*COMBFILTER_MAXPERIOD;
+   prefilter_mem = st->in_mem+CC*(st->overlap);
+   _overlap_mem = prefilter_mem+CC*COMBFILTER_MAXPERIOD;
    /*_overlap_mem = st->in_mem+C*(st->overlap);*/
-   oldBandE = (celt_word16*)(st->in_mem+C*(2*st->overlap+COMBFILTER_MAXPERIOD));
+   oldBandE = (celt_word16*)(st->in_mem+CC*(2*st->overlap+COMBFILTER_MAXPERIOD));
+   oldLogE = oldBandE + CC*st->mode->nbEBands;
+   oldLogE2 = oldLogE + CC*st->mode->nbEBands;
 
    if (enc==NULL)
    {
       ec_byte_writeinit_buffer(&buf, compressed, nbCompressedBytes);
       ec_enc_init(&_enc,&buf);
       enc = &_enc;
+      tell=1;
       nbFilledBytes=0;
    } else {
-      nbFilledBytes=(ec_enc_tell(enc, 0)+4)>>3;
+      tell=ec_enc_tell(enc, 0);
+      nbFilledBytes=(tell+4)>>3;
    }
    nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
 
-   if (st->vbr_rate_norm>0)
+   vbr_rate = st->vbr_rate_norm<<LM;
+   if (vbr_rate>0)
+   {
       effectiveBytes = st->vbr_rate_norm>>BITRES<<LM>>3;
-   else
+      /* Computes the max bit-rate allowed in VBR mode to avoid violating the
+          target rate and buffering.
+         We must do this up front so that bust-prevention logic triggers
+          correctly if we don't have enough bits. */
+      if (st->constrained_vbr)
+      {
+         celt_int32 vbr_bound;
+         celt_int32 max_allowed;
+         /* We could use any multiple of vbr_rate as bound (depending on the
+             delay).
+            This is clamped to ensure we use at least two bytes if the encoder
+             was entirely empty, but to allow 0 in hybrid mode. */
+         vbr_bound = vbr_rate;
+         max_allowed = IMIN(IMAX(tell==1?2:0,
+               vbr_rate+vbr_bound-st->vbr_reservoir>>(BITRES+3)),
+               nbAvailableBytes);
+         if(max_allowed < nbAvailableBytes)
+         {
+            nbCompressedBytes = nbFilledBytes+max_allowed;
+            nbAvailableBytes = max_allowed;
+            ec_byte_shrink(&buf, nbCompressedBytes);
+         }
+      }
+   } else
       effectiveBytes = nbCompressedBytes;
+   total_bits = nbCompressedBytes*8;
 
    effEnd = st->end;
    if (effEnd > st->mode->effEBands)
       effEnd = st->mode->effEBands;
 
    N = M*st->mode->shortMdctSize;
-   ALLOC(in, C*(N+st->overlap), celt_sig);
+   ALLOC(in, CC*(N+st->overlap), celt_sig);
 
    /* Find pitch period and gain */
    {
@@ -797,49 +974,77 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
       celt_sig *pre[2];
       SAVE_STACK;
       c = 0;
-      ALLOC(_pre, C*(N+COMBFILTER_MAXPERIOD), celt_sig);
+      ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
 
       pre[0] = _pre;
       pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
 
+      silence = 1;
       c=0; do {
+         int count = 0;
          const celt_word16 * restrict pcmp = pcm+c;
          celt_sig * restrict inp = in+c*(N+st->overlap)+st->overlap;
 
          for (i=0;i<N;i++)
          {
+            celt_sig x, tmp;
+
+            x = SCALEIN(*pcmp);
+            if (++count==st->upsample)
+            {
+               count=0;
+               pcmp+=CC;
+            } else {
+               x = 0;
+            }
             /* Apply pre-emphasis */
-            celt_sig tmp = MULT16_16(st->mode->preemph[2], SCALEIN(*pcmp));
+            tmp = MULT16_16(st->mode->preemph[2], x);
             *inp = tmp + st->preemph_memE[c];
             st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp)
                                    - MULT16_32_Q15(st->mode->preemph[0], tmp);
+            silence = silence && *inp == 0;
             inp++;
-            pcmp+=C;
          }
          CELT_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
          CELT_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
-      } while (++c<C);
+      } while (++c<CC);
 
+      if (tell==1)
+         ec_enc_bit_logp(enc, silence, 15);
+      else
+         silence=0;
+      if (silence)
+      {
+         /*In VBR mode there is no need to send more than the minimum. */
+         if (vbr_rate>0)
+         {
+            effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
+            total_bits=nbCompressedBytes*8;
+            nbAvailableBytes=2;
+            ec_byte_shrink(&buf, nbCompressedBytes);
+         }
+         /* Pretend we've filled all the remaining bits with zeros
+            (that's what the initialiser did anyway) */
+         tell = nbCompressedBytes*8;
+         enc->nbits_total+=tell-ec_enc_tell(enc,0);
+      }
 #ifdef ENABLE_POSTFILTER
-      if (nbAvailableBytes>12*C && st->start==0)
+      if (nbAvailableBytes>12*C && st->start==0 && !silence)
       {
          VARDECL(celt_word16, pitch_buf);
          ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, celt_word16);
-         celt_word32 tmp=0;
-         celt_word32 mem0[2]={0,0};
-         celt_word16 mem1[2]={0,0};
-
-         pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD+N,
-                          C, mem0, mem1);
-         pitch_search(st->mode, pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index, &tmp, 1<<LM);
+
+         pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+         pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
+               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
          pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
 
          gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
                N, &pitch_index, st->prefilter_period, st->prefilter_gain);
-         if (pitch_index > COMBFILTER_MAXPERIOD)
-            pitch_index = COMBFILTER_MAXPERIOD;
+         if (pitch_index > COMBFILTER_MAXPERIOD-2)
+            pitch_index = COMBFILTER_MAXPERIOD-2;
          gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+         prefilter_tapset = st->tapset_decision;
       } else {
          gain1 = 0;
       }
@@ -863,15 +1068,17 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
       pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
       if (gain1<pf_threshold)
       {
-         ec_enc_bit_logp(enc, 0, 1);
+         if(st->start==0 && tell+17<=total_bits)
+            ec_enc_bit_logp(enc, 0, 1);
          gain1 = 0;
+         pf_on = 0;
       } else {
          int qg;
          int octave;
 
          if (gain1 > QCONST16(.6f,15))
             gain1 = QCONST16(.6f,15);
-         if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1,15))
+         if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
             gain1=st->prefilter_gain;
 
 #ifdef FIXED_POINT
@@ -880,15 +1087,21 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
          qg = floor(.5+gain1*8)-2;
 #endif
          ec_enc_bit_logp(enc, 1, 1);
+         pitch_index += 1;
          octave = EC_ILOG(pitch_index)-5;
          ec_enc_uint(enc, octave, 6);
          ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
+         pitch_index -= 1;
          ec_enc_bits(enc, qg, 2);
          gain1 = QCONST16(.125f,15)*(qg+2);
+         ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
+         pf_on = 1;
       }
       /*printf("%d %f\n", pitch_index, gain1);*/
 #else /* ENABLE_POSTFILTER */
-      ec_enc_bit_logp(enc, 0, 1);
+      if(st->start==0 && tell+17<=total_bits)
+         ec_enc_bit_logp(enc, 0, 1);
+      pf_on = 0;
 #endif /* ENABLE_POSTFILTER */
 
       c=0; do {
@@ -896,7 +1109,8 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
          CELT_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
 #ifdef ENABLE_POSTFILTER
          comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
-               st->prefilter_period, pitch_index, N, C, -st->prefilter_gain, -gain1, st->mode->window, st->mode->overlap);
+               st->prefilter_period, pitch_index, N, -st->prefilter_gain, -gain1,
+               st->prefilter_tapset, prefilter_tapset, st->mode->window, st->mode->overlap);
 #endif /* ENABLE_POSTFILTER */
          CELT_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
 
@@ -909,7 +1123,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
             CELT_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
          }
 #endif /* ENABLE_POSTFILTER */
-      } while (++c<C);
+      } while (++c<CC);
 
       RESTORE_STACK;
    }
@@ -920,25 +1134,42 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
    resynth = 0;
 #endif
 
-   if (st->complexity > 1 && LM>0)
+   isTransient = 0;
+   shortBlocks = 0;
+   if (LM>0 && ec_enc_tell(enc, 0)+3<=total_bits)
    {
-      isTransient = M > 1 &&
-         transient_analysis(in, N+st->overlap, C, &st->frame_max, st->overlap);
-   } else {
-      isTransient = 0;
+      if (st->complexity > 1)
+      {
+         isTransient = transient_analysis(in, N+st->overlap, CC,
+                  st->overlap);
+         if (isTransient)
+            shortBlocks = M;
+      }
+      ec_enc_bit_logp(enc, isTransient, 3);
    }
 
-   if (isTransient)
-      shortBlocks = M;
-   else
-      shortBlocks = 0;
-
-   ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
-   ALLOC(bandE,st->mode->nbEBands*C, celt_ener);
-   ALLOC(bandLogE,st->mode->nbEBands*C, celt_word16);
+   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
+   ALLOC(bandE,st->mode->nbEBands*CC, celt_ener);
+   ALLOC(bandLogE,st->mode->nbEBands*CC, celt_word16);
    /* Compute MDCTs */
-   compute_mdcts(st->mode, shortBlocks, in, freq, C, LM);
+   compute_mdcts(st->mode, shortBlocks, in, freq, CC, LM);
 
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<N;i++)
+         freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i]));
+   }
+   if (st->upsample != 1)
+   {
+      c=0; do
+      {
+         int bound = N/st->upsample;
+         for (i=0;i<bound;i++)
+            freq[c*N+i] *= st->upsample;
+         for (;i<N;i++)
+            freq[c*N+i] = 0;
+      } while (++c<C);
+   }
    ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
 
    compute_band_energies(st->mode, freq, bandE, effEnd, C, M);
@@ -956,31 +1187,34 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
 
    ALLOC(error, C*st->mode->nbEBands, celt_word16);
    quant_coarse_energy(st->mode, st->start, st->end, effEnd, bandLogE,
-         oldBandE, nbCompressedBytes*8, error, enc,
+         oldBandE, total_bits, error, enc,
          C, LM, nbAvailableBytes, st->force_intra,
          &st->delayedIntra, st->complexity >= 4);
 
-   if (LM > 0)
-      ec_enc_bit_logp(enc, shortBlocks!=0, 3);
-
    tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
 
-   if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
+   st->spread_decision = SPREAD_NORMAL;
+   if (ec_enc_tell(enc, 0)+4<=total_bits)
    {
-      if (st->complexity == 0)
+      if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
       {
-         st->spread_decision = SPREAD_NONE;
+         if (st->complexity == 0)
+            st->spread_decision = SPREAD_NONE;
       } else {
-         st->spread_decision = SPREAD_NORMAL;
+         st->spread_decision = spreading_decision(st->mode, X,
+               &st->tonal_average, st->spread_decision, &st->hf_average,
+               &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
       }
-   } else {
-      st->spread_decision = spreading_decision(st->mode, X, &st->tonal_average, st->spread_decision, effEnd, C, M);
+      ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
    }
-   ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
 
+   ALLOC(cap, st->mode->nbEBands, int);
    ALLOC(offsets, st->mode->nbEBands, int);
 
    for (i=0;i<st->mode->nbEBands;i++)
+      cap[i] = st->mode->cache.caps[st->mode->nbEBands*(2*LM+C-1)+i]
+            << C+LM+BITRES-2;
+   for (i=0;i<st->mode->nbEBands;i++)
       offsets[i] = 0;
    /* Dynamic allocation code */
    /* Make sure that dynamic allocation can't make us bust the budget */
@@ -1008,42 +1242,59 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
             offsets[i] += 1;
       }
    }
-   dynalloc_prob = 6;
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   total_boost = 0;
+   tell = ec_enc_tell(enc, BITRES);
    for (i=st->start;i<st->end;i++)
    {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
       int j;
-      ec_enc_bit_logp(enc, offsets[i]!=0, dynalloc_prob);
-      if (offsets[i]!=0)
+      width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
+            && boost < cap[i]; j++)
       {
-         int width, quanta;
-         width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-         /* quanta is 6 bits, but no more than 1 bit/sample
-            and no less than 1/8 bit/sample */
-         quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
-         for (j=0;j<offsets[i]-1;j++)
-            ec_enc_bit_logp(enc, 1, 1);
-         ec_enc_bit_logp(enc, 0, 1);
-         offsets[i] *= quanta;
-         /* Making dynalloc more likely */
-         dynalloc_prob = IMAX(2, dynalloc_prob-1);
+         int flag;
+         flag = j<offsets[i];
+         ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
+         tell = ec_enc_tell(enc, BITRES);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_boost += quanta;
+         dynalloc_loop_logp = 1;
       }
+      /* Making dynalloc more likely */
+      if (j)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+      offsets[i] = boost;
+   }
+   alloc_trim = 5;
+   if (tell+(6<<BITRES) <= total_bits - total_boost)
+   {
+      alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
+            st->mode->nbEBands, LM, C, N);
+      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
+      tell = ec_enc_tell(enc, BITRES);
    }
-   alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE, st->mode->nbEBands, LM, C, N);
-   ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
 
    /* Variable bitrate */
-   if (st->vbr_rate_norm>0)
+   if (vbr_rate>0)
    {
      celt_word16 alpha;
-     celt_int32 delta, tell;
+     celt_int32 delta;
      /* The target rate in 8th bits per frame */
-     celt_int32 vbr_rate;
      celt_int32 target;
-     celt_int32 vbr_bound, max_allowed, min_allowed;
+     celt_int32 min_allowed;
 
-     target = vbr_rate = M*st->vbr_rate_norm;
-
-     target = target + st->vbr_offset - ((40*C+20)<<BITRES);
+     target = vbr_rate + st->vbr_offset - ((40*C+20)<<BITRES);
 
      /* Shortblocks get a large boost in bitrate, but since they
         are uncommon long blocks are not greatly affected */
@@ -1054,25 +1305,29 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
      else if (M > 1)
         target-=(target+14)/28;
 
-     tell = ec_enc_tell(enc, BITRES);
-
      /* The current offset is removed from the target and the space used
         so far is added*/
      target=target+tell;
-     /* By how much did we "miss" the target on that frame */
-     delta = target - vbr_rate;
 
-     /* Computes the max bit-rate allowed in VBR more to avoid violating the target rate and buffering */
-     vbr_bound = vbr_rate;
-     if (st->constrained_vbr)
-        max_allowed = IMIN(vbr_rate+vbr_bound-st->vbr_reservoir>>(BITRES+3),nbAvailableBytes);
-     else
-        max_allowed = nbAvailableBytes;
-     min_allowed = (tell>>(BITRES+3)) + 2 - nbFilledBytes;
+     /* In VBR mode the frame size must not be reduced so much that it would
+         result in the encoder running out of bits.
+        The margin of 2 bytes ensures that none of the bust-prevention logic
+         in the decoder will have triggered so far. */
+     min_allowed = (tell+total_boost+(1<<BITRES+3)-1>>(BITRES+3)) + 2 - nbFilledBytes;
 
-     /* In VBR mode the frame size must not be reduced so much that it would result in the encoder running out of bits */
      nbAvailableBytes = target+(1<<(BITRES+2))>>(BITRES+3);
-     nbAvailableBytes=IMAX(min_allowed,IMIN(max_allowed,nbAvailableBytes));
+     nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
+     nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
+
+     if(silence)
+     {
+       nbAvailableBytes = 2;
+       target = 2*8<<BITRES;
+     }
+
+     /* By how much did we "miss" the target on that frame */
+     delta = target - vbr_rate;
+
      target=nbAvailableBytes<<(BITRES+3);
 
      if (st->vbr_count < 970)
@@ -1091,34 +1346,27 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
      st->vbr_offset = -st->vbr_drift;
      /*printf ("%d\n", st->vbr_drift);*/
 
-     /* We could use any multiple of vbr_rate as bound (depending on the delay) */
      if (st->constrained_vbr && st->vbr_reservoir < 0)
      {
         /* We're under the min value -- increase rate */
         int adjust = (-st->vbr_reservoir)/(8<<BITRES);
-        nbAvailableBytes += adjust;
+        /* Unless we're just coding silence */
+        nbAvailableBytes += silence?0:adjust;
         st->vbr_reservoir = 0;
         /*printf ("+%d\n", adjust);*/
      }
      nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
-
      /* This moves the raw bits to take into account the new compressed size */
      ec_byte_shrink(&buf, nbCompressedBytes);
    }
-
-   if (C==2)
-   {
-      /* Always use MS for 2.5 ms frames until we can do a better analysis */
-      if (LM==0)
-         dual_stereo = 0;
-      else
-         dual_stereo = stereo_analysis(st->mode, X, st->mode->nbEBands, LM, C, N);
-      ec_enc_bit_logp(enc, dual_stereo, 1);
-   }
    if (C==2)
    {
       int effectiveRate;
 
+      /* Always use MS for 2.5 ms frames until we can do a better analysis */
+      if (LM!=0)
+         dual_stereo = stereo_analysis(st->mode, X, LM, N);
+
       /* Account for coarse energy */
       effectiveRate = (8*effectiveBytes - 80)>>LM;
 
@@ -1139,7 +1387,6 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
       else
          intensity = 100;
       intensity = IMIN(st->end,IMAX(st->start, intensity));
-      ec_enc_uint(enc, intensity-st->start, 1+st->end-st->start);
    }
 
    /* Bit allocation */
@@ -1147,13 +1394,16 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
    ALLOC(pulses, st->mode->nbEBands, int);
    ALLOC(fine_priority, st->mode->nbEBands, int);
 
-   /* bits =   packet size        -       where we are           - safety */
+   /* bits =   packet size        -       where we are         - safety*/
    bits = (nbCompressedBytes*8<<BITRES) - ec_enc_tell(enc, BITRES) - 1;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets,
-         alloc_trim, bits, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=(LM+2<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
+         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
    st->lastCodedBands = codedBands;
 
-   quant_fine_energy(st->mode, st->start, st->end, bandE, oldBandE, error, fine_quant, enc, C);
+   quant_fine_energy(st->mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
 
 #ifdef MEASURE_NORM_MSE
    float X0[3000];
@@ -1167,11 +1417,23 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
 #endif
 
    /* Residual quantisation */
-   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL,
+   ALLOC(collapse_masks, st->mode->nbEBands, unsigned char);
+   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
          bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res, resynth,
-         nbCompressedBytes*8, enc, LM, codedBands);
+         nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
 
-   quant_energy_finalise(st->mode, st->start, st->end, bandE, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_enc_tell(enc, 0), enc, C);
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = st->consec_transient<2;
+      ec_enc_bits(enc, anti_collapse_on, 1);
+   }
+   quant_energy_finalise(st->mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_enc_tell(enc, 0), enc, C);
+
+   if (silence)
+   {
+      for (i=0;i<C*st->mode->nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+   }
 
 #ifdef RESYNTH
    /* Re-synthesis of the coded audio if required */
@@ -1181,16 +1443,26 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
       celt_sig *overlap_mem[2];
 
       log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
+      if (silence)
+      {
+         for (i=0;i<C*st->mode->nbEBands;i++)
+            bandE[i] = 0;
+      }
 
 #ifdef MEASURE_NORM_MSE
       measure_norm_mse(st->mode, X, X0, bandE, bandE0, M, N, C);
 #endif
+      if (anti_collapse_on)
+      {
+         anti_collapse(st->mode, X, collapse_masks, LM, C, N,
+               st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+      }
 
       /* Synthesis */
       denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M);
 
       CELT_MOVE(st->syn_mem[0], st->syn_mem[0]+N, MAX_PERIOD);
-      if (C==2)
+      if (CC==2)
          CELT_MOVE(st->syn_mem[1], st->syn_mem[1]+N, MAX_PERIOD);
 
       c=0; do
@@ -1202,15 +1474,21 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
             freq[c*N+i] = 0;
       while (++c<C);
 
+      if (CC==2&&C==1)
+      {
+         for (i=0;i<N;i++)
+            freq[N+i] = freq[i];
+      }
+
       out_mem[0] = st->syn_mem[0]+MAX_PERIOD;
-      if (C==2)
+      if (CC==2)
          out_mem[1] = st->syn_mem[1]+MAX_PERIOD;
 
       c=0; do
          overlap_mem[c] = _overlap_mem + c*st->overlap;
-      while (++c<C);
+      while (++c<CC);
 
-      compute_inv_mdcts(st->mode, shortBlocks, freq, out_mem, overlap_mem, C, LM);
+      compute_inv_mdcts(st->mode, shortBlocks, freq, out_mem, overlap_mem, CC, LM);
 
 #ifdef ENABLE_POSTFILTER
       c=0; do {
@@ -1218,30 +1496,62 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
          st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
          if (LM!=0)
          {
-            comb_filter(out_mem[c], out_mem[c], st->prefilter_period, st->prefilter_period, st->overlap, C,
-                  st->prefilter_gain, st->prefilter_gain, NULL, 0);
-            comb_filter(out_mem[c]+st->overlap, out_mem[c]+st->overlap, st->prefilter_period, pitch_index, N-st->overlap, C,
-                  st->prefilter_gain, gain1, st->mode->window, st->mode->overlap);
+            comb_filter(out_mem[c], out_mem[c], st->prefilter_period, st->prefilter_period, st->overlap,
+                  st->prefilter_gain, st->prefilter_gain, st->prefilter_tapset, st->prefilter_tapset,
+                  NULL, 0);
+            comb_filter(out_mem[c]+st->overlap, out_mem[c]+st->overlap, st->prefilter_period, pitch_index, N-st->overlap,
+                  st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
+                  st->mode->window, st->mode->overlap);
          } else {
-            comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, N, C,
-                  st->prefilter_gain_old, st->prefilter_gain, st->mode->window, st->mode->overlap);
+            comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, N,
+                  st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
+                  st->mode->window, st->mode->overlap);
          }
-      } while (++c<C);
+      } while (++c<CC);
 #endif /* ENABLE_POSTFILTER */
 
-      deemphasis(out_mem, (celt_word16*)pcm, N, C, st->mode->preemph, st->preemph_memD);
+      deemphasis(out_mem, (celt_word16*)pcm, N, CC, st->upsample, st->mode->preemph, st->preemph_memD);
       st->prefilter_period_old = st->prefilter_period;
       st->prefilter_gain_old = st->prefilter_gain;
+      st->prefilter_tapset_old = st->prefilter_tapset;
    }
 #endif
 
    st->prefilter_period = pitch_index;
    st->prefilter_gain = gain1;
+   st->prefilter_tapset = prefilter_tapset;
+
+   if (CC==2&&C==1) {
+      for (i=0;i<st->mode->nbEBands;i++)
+         oldBandE[st->mode->nbEBands+i]=oldBandE[i];
+   }
+
+   /* In case start or end were to change */
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+         oldBandE[c*st->mode->nbEBands+i]=0;
+      for (i=st->end;i<st->mode->nbEBands;i++)
+         oldBandE[c*st->mode->nbEBands+i]=0;
+   } while (++c<CC);
+   if (!isTransient)
+   {
+      for (i=0;i<CC*st->mode->nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<CC*st->mode->nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+   } else {
+      for (i=0;i<CC*st->mode->nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   if (isTransient)
+      st->consec_transient++;
+   else
+      st->consec_transient=0;
+   st->rng = enc->rng;
 
    /* If there's any room left (can only happen for very high rates),
-      fill it with zeros */
-   while (ec_enc_tell(enc,0) + 8 <= nbCompressedBytes*8)
-      ec_enc_bits(enc, 0, 8);
+      it's already filled with zeros */
    ec_enc_done(enc);
    
    RESTORE_STACK;
@@ -1255,22 +1565,15 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i
 #ifndef DISABLE_FLOAT_API
 int celt_encode_with_ec_float(CELTEncoder * restrict st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
 {
-   int j, ret, C, N, LM, M;
+   int j, ret, C, N;
    VARDECL(celt_int16, in);
    SAVE_STACK;
 
    if (pcm==NULL)
       return CELT_BAD_ARG;
 
-   for (LM=0;LM<4;LM++)
-      if (st->mode->shortMdctSize<<LM==frame_size)
-         break;
-   if (LM>=MAX_CONFIG_SIZES)
-      return CELT_BAD_ARG;
-   M=1<<LM;
-
    C = CHANNELS(st->channels);
-   N = M*st->mode->shortMdctSize;
+   N = frame_size;
    ALLOC(in, C*N, celt_int16);
 
    for (j=0;j<C*N;j++)
@@ -1289,22 +1592,15 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const float * pcm, int
 #else
 int celt_encode_with_ec(CELTEncoder * restrict st, const celt_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
 {
-   int j, ret, C, N, LM, M;
+   int j, ret, C, N;
    VARDECL(celt_sig, in);
    SAVE_STACK;
 
    if (pcm==NULL)
       return CELT_BAD_ARG;
 
-   for (LM=0;LM<4;LM++)
-      if (st->mode->shortMdctSize<<LM==frame_size)
-         break;
-   if (LM>=MAX_CONFIG_SIZES)
-      return CELT_BAD_ARG;
-   M=1<<LM;
-
    C=CHANNELS(st->channels);
-   N=M*st->mode->shortMdctSize;
+   N=frame_size;
    ALLOC(in, C*N, celt_sig);
    for (j=0;j<C*N;j++) {
      in[j] = SCALEOUT(pcm[j]);
@@ -1402,13 +1698,21 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
          if (value>3072000)
             value = 3072000;
          frame_rate = ((st->mode->Fs<<3)+(N>>1))/N;
-         st->vbr_rate_norm = ((value<<(BITRES+3))+(frame_rate>>1))/frame_rate;
+         st->vbr_rate_norm = value>0?IMAX(1,((value<<(BITRES+3))+(frame_rate>>1))/frame_rate):0;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         celt_int32 value = va_arg(ap, celt_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
       }
       break;
       case CELT_RESET_STATE:
       {
          CELT_MEMSET((char*)&st->ENCODER_RESET_START, 0,
-               celt_encoder_get_size(st->mode, st->channels)-
+               celt_encoder_get_size_custom(st->mode, st->channels)-
                ((char*)&st->ENCODER_RESET_START - (char*)st));
          st->vbr_offset = 0;
          st->delayedIntra = 1;
@@ -1443,43 +1747,86 @@ struct CELTDecoder {
    const CELTMode *mode;
    int overlap;
    int channels;
+   int stream_channels;
 
+   int downsample;
    int start, end;
 
    /* Everything beyond this point gets cleared on a reset */
-#define DECODER_RESET_START last_pitch_index
+#define DECODER_RESET_START rng
 
+   ec_uint32 rng;
    int last_pitch_index;
    int loss_count;
    int postfilter_period;
    int postfilter_period_old;
    celt_word16 postfilter_gain;
    celt_word16 postfilter_gain_old;
+   int postfilter_tapset;
+   int postfilter_tapset_old;
 
    celt_sig preemph_memD[2];
    
    celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
    /* celt_word16 lpc[],  Size = channels*LPC_ORDER */
    /* celt_word16 oldEBands[], Size = channels*mode->nbEBands */
+   /* celt_word16 oldLogE[], Size = channels*mode->nbEBands */
+   /* celt_word16 oldLogE2[], Size = channels*mode->nbEBands */
+   /* celt_word16 backgroundLogE[], Size = channels*mode->nbEBands */
 };
 
-int celt_decoder_get_size(const CELTMode *mode, int channels)
+int celt_decoder_get_size(int channels)
+{
+   const CELTMode *mode = celt_mode_create(48000, 960, NULL);
+   return celt_decoder_get_size_custom(mode, channels);
+}
+
+int celt_decoder_get_size_custom(const CELTMode *mode, int channels)
 {
    int size = sizeof(struct CELTDecoder)
             + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
             + channels*LPC_ORDER*sizeof(celt_word16)
-            + channels*mode->nbEBands*sizeof(celt_word16);
+            + 4*channels*mode->nbEBands*sizeof(celt_word16);
    return size;
 }
 
-CELTDecoder *celt_decoder_create(const CELTMode *mode, int channels, int *error)
+CELTDecoder *celt_decoder_create(int sampling_rate, int channels, int *error)
 {
-   return celt_decoder_init(
-         (CELTDecoder *)celt_alloc(celt_decoder_get_size(mode, channels)),
-         mode, channels, error);
+   CELTDecoder *st;
+   st = (CELTDecoder *)celt_alloc(celt_decoder_get_size(channels));
+   if (st!=NULL && celt_decoder_init(st, sampling_rate, channels, error)==NULL)
+   {
+      celt_decoder_destroy(st);
+      st = NULL;
+   }
+   return st;
 }
 
-CELTDecoder *celt_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels, int *error)
+CELTDecoder *celt_decoder_create_custom(const CELTMode *mode, int channels, int *error)
+{
+   CELTDecoder *st = (CELTDecoder *)celt_alloc(celt_decoder_get_size_custom(mode, channels));
+   if (st!=NULL && celt_decoder_init_custom(st, mode, channels, error)==NULL)
+   {
+      celt_decoder_destroy(st);
+      st = NULL;
+   }
+   return st;
+}
+
+CELTDecoder *celt_decoder_init(CELTDecoder *st, int sampling_rate, int channels, int *error)
+{
+   celt_decoder_init_custom(st, celt_mode_create(48000, 960, NULL), channels, error);
+   st->downsample = resampling_factor(sampling_rate);
+   if (st->downsample==0)
+   {
+      if (error)
+         *error = CELT_BAD_ARG;
+      return NULL;
+   }
+   return st;
+}
+
+CELTDecoder *celt_decoder_init_custom(CELTDecoder *st, const CELTMode *mode, int channels, int *error)
 {
    if (channels < 0 || channels > 2)
    {
@@ -1495,12 +1842,13 @@ CELTDecoder *celt_decoder_init(CELTDecoder *st, const CELTMode *mode, int channe
       return NULL;
    }
 
-   CELT_MEMSET((char*)st, 0, celt_decoder_get_size(mode, channels));
+   CELT_MEMSET((char*)st, 0, celt_decoder_get_size_custom(mode, channels));
 
    st->mode = mode;
    st->overlap = mode->overlap;
-   st->channels = channels;
+   st->stream_channels = st->channels = channels;
 
+   st->downsample = 1;
    st->start = 0;
    st->end = st->mode->effEBands;
 
@@ -1529,6 +1877,8 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
    celt_sig *decode_mem[2];
    celt_sig *overlap_mem[2];
    celt_word16 *lpc;
+   celt_word32 *out_syn[2];
+   celt_word16 *oldBandE, *oldLogE2, *backgroundLogE;
    SAVE_STACK;
    
    c=0; do {
@@ -1537,31 +1887,58 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
       overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
    } while (++c<C);
    lpc = (celt_word16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C);
+   oldBandE = lpc+C*LPC_ORDER;
+   oldLogE2 = oldBandE + C*st->mode->nbEBands;
+   backgroundLogE = oldLogE2  + C*st->mode->nbEBands;
+
+   out_syn[0] = out_mem[0]+MAX_PERIOD-N;
+   if (C==2)
+      out_syn[1] = out_mem[1]+MAX_PERIOD-N;
 
    len = N+st->mode->overlap;
    
-   if (st->loss_count == 0)
+   if (st->loss_count >= 5)
+   {
+      VARDECL(celt_sig, freq);
+      VARDECL(celt_norm, X);
+      VARDECL(celt_ener, bandE);
+      celt_uint32 seed;
+
+      ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
+      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+      ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
+
+      log2Amp(st->mode, st->start, st->end, bandE, backgroundLogE, C);
+
+      seed = st->rng;
+      for (i=0;i<C*N;i++)
+      {
+            seed = lcg_rand(seed);
+            X[i] = (celt_int32)(seed)>>20;
+      }
+      st->rng = seed;
+      for (c=0;c<C;c++)
+         for (i=0;i<st->mode->nbEBands;i++)
+            renormalise_vector(X+N*c+(st->mode->eBands[i]<<LM), (st->mode->eBands[i+1]-st->mode->eBands[i])<<LM, Q15ONE);
+
+      denormalise_bands(st->mode, X, freq, bandE, st->mode->nbEBands, C, 1<<LM);
+
+      compute_inv_mdcts(st->mode, 0, freq, out_syn, overlap_mem, C, LM);
+   } else if (st->loss_count == 0)
    {
       celt_word16 pitch_buf[MAX_PERIOD>>1];
-      celt_word32 tmp=0;
-      celt_word32 mem0[2]={0,0};
-      celt_word16 mem1[2]={0,0};
       int len2 = len;
       /* FIXME: This is a kludge */
       if (len2>MAX_PERIOD>>1)
          len2 = MAX_PERIOD>>1;
-      pitch_downsample(out_mem, pitch_buf, MAX_PERIOD, MAX_PERIOD,
-                       C, mem0, mem1);
-      pitch_search(st->mode, pitch_buf+((MAX_PERIOD-len2)>>1), pitch_buf, len2,
-                   MAX_PERIOD-len2-100, &pitch_index, &tmp, 1<<LM);
+      pitch_downsample(out_mem, pitch_buf, MAX_PERIOD, C);
+      pitch_search(pitch_buf+((MAX_PERIOD-len2)>>1), pitch_buf, len2,
+                   MAX_PERIOD-len2-100, &pitch_index);
       pitch_index = MAX_PERIOD-len2-pitch_index;
       st->last_pitch_index = pitch_index;
    } else {
       pitch_index = st->last_pitch_index;
-      if (st->loss_count < 5)
-         fade = QCONST16(.8f,15);
-      else
-         fade = 0;
+      fade = QCONST16(.8f,15);
    }
 
    c=0; do {
@@ -1669,8 +2046,9 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
 
 #ifdef ENABLE_POSTFILTER
       /* Apply post-filter to the MDCT overlap of the previous frame */
-      comb_filter(out_mem[c]+MAX_PERIOD, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap, C,
-                  st->postfilter_gain, st->postfilter_gain, NULL, 0);
+      comb_filter(out_mem[c]+MAX_PERIOD, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap,
+                  st->postfilter_gain, st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
+                  NULL, 0);
 #endif /* ENABLE_POSTFILTER */
 
       for (i=0;i<MAX_PERIOD+st->mode->overlap-N;i++)
@@ -1691,20 +2069,15 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict p
 
 #ifdef ENABLE_POSTFILTER
       /* Apply pre-filter to the MDCT overlap for the next frame (post-filter will be applied then) */
-      comb_filter(e, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap, C,
-                  -st->postfilter_gain, -st->postfilter_gain, NULL, 0);
+      comb_filter(e, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap,
+                  -st->postfilter_gain, -st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
+                  NULL, 0);
 #endif /* ENABLE_POSTFILTER */
       for (i=0;i<overlap;i++)
          out_mem[c][MAX_PERIOD+i] = e[i];
    } while (++c<C);
 
-   {
-      celt_word32 *out_syn[2];
-      out_syn[0] = out_mem[0]+MAX_PERIOD-N;
-      if (C==2)
-         out_syn[1] = out_mem[1]+MAX_PERIOD-N;
-      deemphasis(out_syn, pcm, N, C, st->mode->preemph, st->preemph_memD);
-   }
+   deemphasis(out_syn, pcm, N, C, st->downsample, st->mode->preemph, st->preemph_memD);
    
    st->loss_count++;
 
@@ -1728,22 +2101,23 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    VARDECL(celt_ener, bandE);
    VARDECL(int, fine_quant);
    VARDECL(int, pulses);
+   VARDECL(int, cap);
    VARDECL(int, offsets);
    VARDECL(int, fine_priority);
    VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
    celt_sig *out_mem[2];
    celt_sig *decode_mem[2];
    celt_sig *overlap_mem[2];
    celt_sig *out_syn[2];
    celt_word16 *lpc;
-   celt_word16 *oldBandE;
+   celt_word16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
 
    int shortBlocks;
    int isTransient;
    int intra_ener;
-   const int C = CHANNELS(st->channels);
+   const int CC = CHANNELS(st->channels);
    int LM, M;
-   int nbFilledBytes, nbAvailableBytes;
    int effEnd;
    int codedBands;
    int alloc_trim;
@@ -1751,12 +2125,22 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    celt_word16 postfilter_gain;
    int intensity=0;
    int dual_stereo=0;
-   int dynalloc_prob;
+   celt_int32 total_bits;
+   celt_int32 balance;
+   celt_int32 tell;
+   int dynalloc_logp;
+   int postfilter_tapset;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence;
+   const int C = CHANNELS(st->stream_channels);
+
    SAVE_STACK;
 
    if (pcm==NULL)
       return CELT_BAD_ARG;
 
+   frame_size *= st->downsample;
    for (LM=0;LM<4;LM++)
       if (st->mode->shortMdctSize<<LM==frame_size)
          break;
@@ -1768,9 +2152,12 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
       decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
       out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
       overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
-   } while (++c<C);
-   lpc = (celt_word16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C);
-   oldBandE = lpc+C*LPC_ORDER;
+   } while (++c<CC);
+   lpc = (celt_word16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*CC);
+   oldBandE = lpc+CC*LPC_ORDER;
+   oldLogE = oldBandE + CC*st->mode->nbEBands;
+   oldLogE2 = oldLogE + CC*st->mode->nbEBands;
+   backgroundLogE = oldLogE2  + CC*st->mode->nbEBands;
 
    N = M*st->mode->shortMdctSize;
 
@@ -1778,19 +2165,19 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    if (effEnd > st->mode->effEBands)
       effEnd = st->mode->effEBands;
 
-   ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
-   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
-   ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
+   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
+   ALLOC(X, CC*N, celt_norm);   /**< Interleaved normalised MDCTs */
+   ALLOC(bandE, st->mode->nbEBands*CC, celt_ener);
    c=0; do
       for (i=0;i<M*st->mode->eBands[st->start];i++)
          X[c*N+i] = 0;
-   while (++c<C);
+   while (++c<CC);
    c=0; do   
       for (i=M*st->mode->eBands[effEnd];i<N;i++)
          X[c*N+i] = 0;
-   while (++c<C);
+   while (++c<CC);
 
-   if (data == NULL)
+   if (data == NULL || len<=1)
    {
       celt_decode_lost(st, pcm, N, LM);
       RESTORE_STACK;
@@ -1806,38 +2193,64 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
       ec_byte_readinit(&buf,(unsigned char*)data,len);
       ec_dec_init(&_dec,&buf);
       dec = &_dec;
-      nbFilledBytes = 0;
-   } else {
-      nbFilledBytes = (ec_dec_tell(dec, 0)+4)>>3;
    }
-   nbAvailableBytes = len-nbFilledBytes;
 
-   if (ec_dec_bit_logp(dec, 1))
+   if (CC==1&&C==2)
    {
-#ifdef ENABLE_POSTFILTER
-      int qg, octave;
-      octave = ec_dec_uint(dec, 6);
-      postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave);
-      qg = ec_dec_bits(dec, 2);
-      postfilter_gain = QCONST16(.125f,15)*(qg+2);
-#else /* ENABLE_POSTFILTER */
       RESTORE_STACK;
       return CELT_CORRUPTED_DATA;
-#endif /* ENABLE_POSTFILTER */
+   } else if (CC==2&&C==1)
+   {
+      for (i=0;i<st->mode->nbEBands;i++)
+      {
+         oldBandE[i]=MAX16(oldBandE[i],oldBandE[st->mode->nbEBands+i]);
+         oldLogE[i]=MAX16(oldLogE[i],oldLogE[st->mode->nbEBands+i]);
+         oldLogE2[i]=MAX16(oldLogE2[i],oldLogE2[st->mode->nbEBands+i]);
+         backgroundLogE[i]=MAX16(backgroundLogE[i],backgroundLogE[st->mode->nbEBands+i]);
+      }
+   }
 
-   } else {
-      postfilter_gain = 0;
-      postfilter_pitch = 0;
+   total_bits = len*8;
+   tell = ec_dec_tell(dec, 0);
+
+   if (tell==1)
+      silence = ec_dec_bit_logp(dec, 15);
+   else
+      silence = 0;
+   if (silence)
+   {
+      /* Pretend we've read all the remaining bits */
+      tell = len*8;
+      dec->nbits_total+=tell-ec_dec_tell(dec,0);
    }
 
-   /* Decode the global flags (first symbols in the stream) */
-   intra_ener = ec_dec_bit_logp(dec, 3);
-   /* Get band energies */
-   unquant_coarse_energy(st->mode, st->start, st->end, bandE, oldBandE,
-         intra_ener, dec, C, LM);
+   postfilter_gain = 0;
+   postfilter_pitch = 0;
+   postfilter_tapset = 0;
+   if (st->start==0 && tell+17 <= total_bits)
+   {
+      if(ec_dec_bit_logp(dec, 1))
+      {
+#ifdef ENABLE_POSTFILTER
+         int qg, octave;
+         octave = ec_dec_uint(dec, 6);
+         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
+         qg = ec_dec_bits(dec, 2);
+         postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
+         postfilter_gain = QCONST16(.125f,15)*(qg+2);
+#else /* ENABLE_POSTFILTER */
+         RESTORE_STACK;
+         return CELT_CORRUPTED_DATA;
+#endif /* ENABLE_POSTFILTER */
+      }
+      tell = ec_dec_tell(dec, 0);
+   }
 
-   if (LM > 0)
+   if (LM > 0 && tell+3 <= total_bits)
+   {
       isTransient = ec_dec_bit_logp(dec, 3);
+      tell = ec_dec_tell(dec, 0);
+   }
    else
       isTransient = 0;
 
@@ -1846,83 +2259,132 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
    else
       shortBlocks = 0;
 
+   /* Decode the global flags (first symbols in the stream) */
+   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+   /* Get band energies */
+   unquant_coarse_energy(st->mode, st->start, st->end, oldBandE,
+         intra_ener, dec, C, LM);
+
    ALLOC(tf_res, st->mode->nbEBands, int);
-   tf_decode(st->start, st->end, C, isTransient, tf_res, LM, dec);
+   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
 
-   spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
+   tell = ec_dec_tell(dec, 0);
+   spread_decision = SPREAD_NORMAL;
+   if (tell+4 <= total_bits)
+      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
 
    ALLOC(pulses, st->mode->nbEBands, int);
+   ALLOC(cap, st->mode->nbEBands, int);
    ALLOC(offsets, st->mode->nbEBands, int);
    ALLOC(fine_priority, st->mode->nbEBands, int);
 
    for (i=0;i<st->mode->nbEBands;i++)
-      offsets[i] = 0;
-   dynalloc_prob = 6;
+      cap[i] = st->mode->cache.caps[st->mode->nbEBands*(2*LM+C-1)+i]
+            << C+LM+BITRES-2;
+
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   tell = ec_dec_tell(dec, BITRES);
    for (i=st->start;i<st->end;i++)
    {
-      if (ec_dec_bit_logp(dec, dynalloc_prob))
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
       {
-         int width, quanta;
-         width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-         /* quanta is 6 bits, but no more than 1 bit/sample
-            and no less than 1/8 bit/sample */
-         quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
-         while (ec_dec_bit_logp(dec, 1))
-            offsets[i]++;
-         offsets[i]++;
-         offsets[i] *= quanta;
-         /* Making dynalloc more likely */
-         dynalloc_prob = IMAX(2, dynalloc_prob-1);
+         int flag;
+         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
+         tell = ec_dec_tell(dec, BITRES);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_bits -= quanta;
+         dynalloc_loop_logp = 1;
       }
+      offsets[i] = boost;
+      /* Making dynalloc more likely */
+      if (boost>0)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
    }
 
    ALLOC(fine_quant, st->mode->nbEBands, int);
-   alloc_trim = ec_dec_icdf(dec, trim_icdf, 7);
-
-   if (C==2)
-   {
-      dual_stereo = ec_dec_bit_logp(dec, 1);
-      intensity = st->start + ec_dec_uint(dec, 1+st->end-st->start);
-   }
+   alloc_trim = tell+(6<<BITRES) <= total_bits ?
+         ec_dec_icdf(dec, trim_icdf, 7) : 5;
 
    bits = (len*8<<BITRES) - ec_dec_tell(dec, BITRES) - 1;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets,
-         alloc_trim, bits, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0);
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=(LM+2<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
+         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, dec, 0, 0);
    
-   unquant_fine_energy(st->mode, st->start, st->end, bandE, oldBandE, fine_quant, dec, C);
+   unquant_fine_energy(st->mode, st->start, st->end, oldBandE, fine_quant, dec, C);
 
    /* Decode fixed codebook */
-   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL,
+   ALLOC(collapse_masks, st->mode->nbEBands, unsigned char);
+   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
          NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 1,
-         len*8, dec, LM, codedBands);
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
 
-   unquant_energy_finalise(st->mode, st->start, st->end, bandE, oldBandE,
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = ec_dec_bits(dec, 1);
+   }
+
+   unquant_energy_finalise(st->mode, st->start, st->end, oldBandE,
          fine_quant, fine_priority, len*8-ec_dec_tell(dec, 0), dec, C);
 
+   if (anti_collapse_on)
+      anti_collapse(st->mode, X, collapse_masks, LM, C, N,
+            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+
    log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
 
+   if (silence)
+   {
+      for (i=0;i<C*st->mode->nbEBands;i++)
+      {
+         bandE[i] = 0;
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+      }
+   }
    /* Synthesis */
    denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M);
 
    CELT_MOVE(decode_mem[0], decode_mem[0]+N, DECODE_BUFFER_SIZE-N);
-   if (C==2)
+   if (CC==2)
       CELT_MOVE(decode_mem[1], decode_mem[1]+N, DECODE_BUFFER_SIZE-N);
 
    c=0; do
       for (i=0;i<M*st->mode->eBands[st->start];i++)
          freq[c*N+i] = 0;
    while (++c<C);
-   c=0; do
+   c=0; do {
+      int bound = M*st->mode->eBands[effEnd];
+      if (st->downsample!=1)
+         bound = IMIN(bound, N/st->downsample);
       for (i=M*st->mode->eBands[effEnd];i<N;i++)
          freq[c*N+i] = 0;
-   while (++c<C);
+   while (++c<C);
 
    out_syn[0] = out_mem[0]+MAX_PERIOD-N;
-   if (C==2)
+   if (CC==2)
       out_syn[1] = out_mem[1]+MAX_PERIOD-N;
 
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<N;i++)
+         freq[N+i] = freq[i];
+   }
+
    /* Compute inverse MDCTs */
-   compute_inv_mdcts(st->mode, shortBlocks, freq, out_syn, overlap_mem, C, LM);
+   compute_inv_mdcts(st->mode, shortBlocks, freq, out_syn, overlap_mem, CC, LM);
 
 #ifdef ENABLE_POSTFILTER
    c=0; do {
@@ -1930,22 +2392,54 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
       st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
       if (LM!=0)
       {
-         comb_filter(out_syn[c], out_syn[c], st->postfilter_period, st->postfilter_period, st->overlap, C,
-               st->postfilter_gain, st->postfilter_gain, NULL, 0);
-         comb_filter(out_syn[c]+st->overlap, out_syn[c]+st->overlap, st->postfilter_period, postfilter_pitch, N-st->overlap, C,
-               st->postfilter_gain, postfilter_gain, st->mode->window, st->mode->overlap);
+         comb_filter(out_syn[c], out_syn[c], st->postfilter_period, st->postfilter_period, st->overlap,
+               st->postfilter_gain, st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
+               NULL, 0);
+         comb_filter(out_syn[c]+st->overlap, out_syn[c]+st->overlap, st->postfilter_period, postfilter_pitch, N-st->overlap,
+               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
+               st->mode->window, st->mode->overlap);
       } else {
-         comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, N-st->overlap, C,
-               st->postfilter_gain_old, st->postfilter_gain, st->mode->window, st->mode->overlap);
+         comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, N-st->overlap,
+               st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
+               st->mode->window, st->mode->overlap);
       }
-   } while (++c<C);
+   } while (++c<CC);
    st->postfilter_period_old = st->postfilter_period;
    st->postfilter_gain_old = st->postfilter_gain;
+   st->postfilter_tapset_old = st->postfilter_tapset;
    st->postfilter_period = postfilter_pitch;
    st->postfilter_gain = postfilter_gain;
+   st->postfilter_tapset = postfilter_tapset;
 #endif /* ENABLE_POSTFILTER */
 
-   deemphasis(out_syn, pcm, N, C, st->mode->preemph, st->preemph_memD);
+   if (CC==2&&C==1) {
+      for (i=0;i<st->mode->nbEBands;i++)
+         oldBandE[st->mode->nbEBands+i]=oldBandE[i];
+   }
+
+   /* In case start or end were to change */
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+         oldBandE[c*st->mode->nbEBands+i]=0;
+      for (i=st->end;i<st->mode->nbEBands;i++)
+         oldBandE[c*st->mode->nbEBands+i]=0;
+   } while (++c<CC);
+   if (!isTransient)
+   {
+      for (i=0;i<CC*st->mode->nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<CC*st->mode->nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+      for (i=0;i<CC*st->mode->nbEBands;i++)
+         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
+   } else {
+      for (i=0;i<CC*st->mode->nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   st->rng = dec->rng;
+
+   deemphasis(out_syn, pcm, N, CC, st->downsample, st->mode->preemph, st->preemph_memD);
    st->loss_count = 0;
    RESTORE_STACK;
    if (ec_dec_tell(dec,0) > 8*len || ec_dec_get_error(dec))
@@ -1958,22 +2452,15 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
 #ifndef DISABLE_FLOAT_API
 int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *data, int len, float * restrict pcm, int frame_size, ec_dec *dec)
 {
-   int j, ret, C, N, LM, M;
+   int j, ret, C, N;
    VARDECL(celt_int16, out);
    SAVE_STACK;
 
    if (pcm==NULL)
       return CELT_BAD_ARG;
 
-   for (LM=0;LM<4;LM++)
-      if (st->mode->shortMdctSize<<LM==frame_size)
-         break;
-   if (LM>=MAX_CONFIG_SIZES)
-      return CELT_BAD_ARG;
-   M=1<<LM;
-
    C = CHANNELS(st->channels);
-   N = M*st->mode->shortMdctSize;
+   N = frame_size;
    
    ALLOC(out, C*N, celt_int16);
    ret=celt_decode_with_ec(st, data, len, out, frame_size, dec);
@@ -1988,22 +2475,15 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
 #else
 int celt_decode_with_ec(CELTDecoder * restrict st, const unsigned char *data, int len, celt_int16 * restrict pcm, int frame_size, ec_dec *dec)
 {
-   int j, ret, C, N, LM, M;
+   int j, ret, C, N;
    VARDECL(celt_sig, out);
    SAVE_STACK;
 
    if (pcm==NULL)
       return CELT_BAD_ARG;
 
-   for (LM=0;LM<4;LM++)
-      if (st->mode->shortMdctSize<<LM==frame_size)
-         break;
-   if (LM>=MAX_CONFIG_SIZES)
-      return CELT_BAD_ARG;
-   M=1<<LM;
-
    C = CHANNELS(st->channels);
-   N = M*st->mode->shortMdctSize;
+   N = frame_size;
    ALLOC(out, C*N, celt_sig);
 
    ret=celt_decode_with_ec_float(st, data, len, out, frame_size, dec);
@@ -2060,10 +2540,18 @@ int celt_decoder_ctl(CELTDecoder * restrict st, int request, ...)
          st->end = value;
       }
       break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         celt_int32 value = va_arg(ap, celt_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
       case CELT_RESET_STATE:
       {
          CELT_MEMSET((char*)&st->DECODER_RESET_START, 0,
-               celt_decoder_get_size(st->mode, st->channels)-
+               celt_decoder_get_size_custom(st->mode, st->channels)-
                ((char*)&st->DECODER_RESET_START - (char*)st));
       }
       break;