Improved transient_analysis() by adding one frame of memory.
[opus.git] / libcelt / celt.c
index 98bf4bf..90c484d 100644 (file)
@@ -1,5 +1,7 @@
-/* (C) 2007-2008 Jean-Marc Valin, CSIRO
-   (C) 2008 Gregory Maxwell */
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2009 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell 
+   Written by Jean-Marc Valin and Gregory Maxwell */
 /*
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
 #include <math.h>
 #include "celt.h"
 #include "pitch.h"
-#include "kiss_fftr.h"
 #include "bands.h"
 #include "modes.h"
 #include "entcode.h"
 #include "quant_bands.h"
-#include "psy.h"
 #include "rate.h"
 #include "stack_alloc.h"
 #include "mathops.h"
 #include "float_cast.h"
 #include <stdarg.h>
 
-static const celt_word16_t preemph = QCONST16(0.8f,15);
+#define LPC_ORDER 24
+/* #define NEW_PLC */
+#if !defined(FIXED_POINT) || defined(NEW_PLC)
+#include "plc.c"
+#endif
+
+static const celt_word16 preemph = QCONST16(0.8f,15);
 
 #ifdef FIXED_POINT
-static const celt_word16_t transientWindow[16] = {
+static const celt_word16 transientWindow[16] = {
      279,  1106,  2454,  4276,  6510,  9081, 11900, 14872,
    17896, 20868, 23687, 26258, 28492, 30314, 31662, 32489};
 #else
@@ -74,7 +80,7 @@ static const float transientWindow[16] = {
  @brief Encoder state
  */
 struct CELTEncoder {
-   celt_uint32_t marker;
+   celt_uint32 marker;
    const CELTMode *mode;     /**< Mode used by the encoder */
    int frame_size;
    int block_size;
@@ -86,24 +92,30 @@ struct CELTEncoder {
    int pitch_available;     /*  Amount of pitch buffer available */
    int force_intra;
    int delayedIntra;
-   celt_word16_t tonal_average;
+   celt_word16 tonal_average;
    int fold_decision;
+   celt_word16 gain_prod;
+   celt_word32 frame_max;
 
-   int VBR_rate; /* Target number of 16th bits per frame */
-   celt_word16_t * restrict preemph_memE; 
-   celt_sig_t    * restrict preemph_memD;
+   /* VBR-related parameters */
+   celt_int32 vbr_reservoir;
+   celt_int32 vbr_drift;
+   celt_int32 vbr_offset;
+   celt_int32 vbr_count;
 
-   celt_sig_t *in_mem;
-   celt_sig_t *out_mem;
+   celt_int32 vbr_rate; /* Target number of 16th bits per frame */
+   celt_word16 * restrict preemph_memE; 
+   celt_sig    * restrict preemph_memD;
 
-   celt_word16_t *oldBandE;
-#ifdef EXP_PSY
-   celt_word16_t *psy_mem;
-   struct PsyDecay psy;
-#endif
+   celt_sig *in_mem;
+   celt_sig *out_mem;
+   celt_word16 *pitch_buf;
+   celt_sig xmem;
+
+   celt_word16 *oldBandE;
 };
 
-int check_encoder(const CELTEncoder *st) 
+static int check_encoder(const CELTEncoder *st) 
 {
    if (st==NULL)
    {
@@ -119,59 +131,82 @@ int check_encoder(const CELTEncoder *st)
    return CELT_INVALID_STATE;
 }
 
-CELTEncoder *celt_encoder_create(const CELTMode *mode)
+CELTEncoder *celt_encoder_create(const CELTMode *mode, int channels, int *error)
 {
    int N, C;
    CELTEncoder *st;
 
    if (check_mode(mode) != CELT_OK)
+   {
+      if (error)
+         *error = CELT_INVALID_MODE;
+      return NULL;
+   }
+#ifdef DISABLE_STEREO
+   if (channels > 1)
+   {
+      celt_warning("Stereo support was disable from this build");
+      if (error)
+         *error = CELT_BAD_ARG;
+      return NULL;
+   }
+#endif
+
+   if (channels < 0 || channels > 2)
+   {
+      celt_warning("Only mono and stereo supported");
+      if (error)
+         *error = CELT_BAD_ARG;
       return NULL;
+   }
 
    N = mode->mdctSize;
-   C = mode->nbChannels;
+   C = channels;
    st = celt_alloc(sizeof(CELTEncoder));
    
-   if (st==NULL) 
-      return NULL;   
+   if (st==NULL)
+   {
+      if (error)
+         *error = CELT_ALLOC_FAIL;
+      return NULL;
+   }
    st->marker = ENCODERPARTIAL;
    st->mode = mode;
    st->frame_size = N;
    st->block_size = N;
    st->overlap = mode->overlap;
+   st->channels = channels;
 
-   st->VBR_rate = 0;
+   st->vbr_rate = 0;
    st->pitch_enabled = 1;
    st->pitch_permitted = 1;
    st->pitch_available = 1;
    st->force_intra  = 0;
    st->delayedIntra = 1;
-   st->tonal_average = QCONST16(1.,8);
+   st->tonal_average = QCONST16(1.f,8);
    st->fold_decision = 1;
 
-   st->in_mem = celt_alloc(st->overlap*C*sizeof(celt_sig_t));
-   st->out_mem = celt_alloc((MAX_PERIOD+st->overlap)*C*sizeof(celt_sig_t));
-
-   st->oldBandE = (celt_word16_t*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16_t));
+   st->in_mem = celt_alloc(st->overlap*C*sizeof(celt_sig));
+   st->out_mem = celt_alloc((MAX_PERIOD+st->overlap)*C*sizeof(celt_sig));
+   st->pitch_buf = celt_alloc(((MAX_PERIOD>>1)+2)*sizeof(celt_word16));
 
-   st->preemph_memE = (celt_word16_t*)celt_alloc(C*sizeof(celt_word16_t));
-   st->preemph_memD = (celt_sig_t*)celt_alloc(C*sizeof(celt_sig_t));
+   st->oldBandE = (celt_word16*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16));
 
-#ifdef EXP_PSY
-   st->psy_mem = celt_alloc(MAX_PERIOD*sizeof(celt_word16_t));
-   psydecay_init(&st->psy, MAX_PERIOD/2, st->mode->Fs);
-#endif
+   st->preemph_memE = (celt_word16*)celt_alloc(C*sizeof(celt_word16));
+   st->preemph_memD = (celt_sig*)celt_alloc(C*sizeof(celt_sig));
 
    if ((st->in_mem!=NULL) && (st->out_mem!=NULL) && (st->oldBandE!=NULL) 
-#ifdef EXP_PSY
-       && (st->psy_mem!=NULL) 
-#endif   
        && (st->preemph_memE!=NULL) && (st->preemph_memD!=NULL))
    {
+      if (error)
+         *error = CELT_OK;
       st->marker   = ENCODERVALID;
       return st;
    }
    /* If the setup fails for some reason deallocate it. */
    celt_encoder_destroy(st);  
+   if (error)
+      *error = CELT_ALLOC_FAIL;
    return NULL;
 }
 
@@ -201,30 +236,26 @@ void celt_encoder_destroy(CELTEncoder *st)
    
    celt_free(st->in_mem);
    celt_free(st->out_mem);
-   
+   celt_free(st->pitch_buf);
    celt_free(st->oldBandE);
    
    celt_free(st->preemph_memE);
    celt_free(st->preemph_memD);
-   
-#ifdef EXP_PSY
-   celt_free (st->psy_mem);
-   psydecay_clear(&st->psy);
-#endif
+
    st->marker = ENCODERFREED;
    
    celt_free(st);
 }
 
-static inline celt_int16_t FLOAT2INT16(float x)
+static inline celt_int16 FLOAT2INT16(float x)
 {
    x = x*CELT_SIG_SCALE;
    x = MAX32(x, -32768);
    x = MIN32(x, 32767);
-   return (celt_int16_t)float2int(x);
+   return (celt_int16)float2int(x);
 }
 
-static inline celt_word16_t SIG2WORD16(celt_sig_t x)
+static inline celt_word16 SIG2WORD16(celt_sig x)
 {
 #ifdef FIXED_POINT
    x = PSHR32(x, SIG_SHIFT);
@@ -232,108 +263,104 @@ static inline celt_word16_t SIG2WORD16(celt_sig_t x)
    x = MIN32(x, 32767);
    return EXTRACT16(x);
 #else
-   return (celt_word16_t)x;
+   return (celt_word16)x;
 #endif
 }
 
-static int transient_analysis(celt_word32_t *in, int len, int C, int *transient_time, int *transient_shift)
+static int transient_analysis(const celt_word32 * restrict in, int len, int C,
+                              int *transient_time, int *transient_shift,
+                              celt_word32 *frame_max)
 {
-   int c, i, n;
-   celt_word32_t ratio;
-   /* FIXME: Remove the floats here */
-   VARDECL(celt_word32_t, begin);
+   int i, n;
+   celt_word32 ratio;
+   celt_word32 threshold;
+   VARDECL(celt_word32, begin);
    SAVE_STACK;
-   ALLOC(begin, len, celt_word32_t);
-   for (i=0;i<len;i++)
-      begin[i] = ABS32(SHR32(in[C*i],SIG_SHIFT));
-   for (c=1;c<C;c++)
+   ALLOC(begin, len+1, celt_word32);
+   begin[0] = 0;
+   if (C==1)
    {
       for (i=0;i<len;i++)
-         begin[i] = MAX32(begin[i], ABS32(SHR32(in[C*i+c],SIG_SHIFT)));
+         begin[i+1] = MAX32(begin[i], ABS32(in[i]));
+   } else {
+      for (i=0;i<len;i++)
+         begin[i+1] = MAX32(begin[i], MAX32(ABS32(in[C*i]),
+                                            ABS32(in[C*i+1])));
    }
-   for (i=1;i<len;i++)
-      begin[i] = MAX32(begin[i-1],begin[i]);
    n = -1;
-   for (i=8;i<len-8;i++)
+
+   threshold = MULT16_32_Q15(QCONST16(.2f,15),begin[len]);
+   /* If the following condition isn't met, there's just no way
+      we'll have a transient*/
+   if (*frame_max < threshold)
    {
-      if (begin[i] < MULT16_32_Q15(QCONST16(.2f,15),begin[len-1]))
-         n=i;
+      /* It's likely we have a transient, now find it */
+      for (i=8;i<len-8;i++)
+      {
+         if (begin[i+1] < threshold)
+            n=i;
+      }
    }
    if (n<32)
    {
       n = -1;
       ratio = 0;
    } else {
-      ratio = DIV32(begin[len-1],1+begin[n-16]);
+      ratio = DIV32(begin[len],1+MAX32(*frame_max, begin[n-16]));
    }
-   /*printf ("%d %f\n", n, ratio*ratio);*/
    if (ratio < 0)
       ratio = 0;
    if (ratio > 1000)
       ratio = 1000;
    ratio *= ratio;
-   
+
    if (ratio > 2048)
       *transient_shift = 3;
    else
       *transient_shift = 0;
    
    *transient_time = n;
-   
+   *frame_max = begin[len];
+
    RESTORE_STACK;
    return ratio > 20;
 }
 
 /** Apply window and compute the MDCT for all sub-frames and 
     all channels in a frame */
-static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t * restrict in, celt_sig_t * restrict out)
+static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * restrict in, celt_sig * restrict out, int _C)
 {
-   const int C = CHANNELS(mode);
+   const int C = CHANNELS(_C);
    if (C==1 && !shortBlocks)
    {
       const mdct_lookup *lookup = MDCT(mode);
       const int overlap = OVERLAP(mode);
-      mdct_forward(lookup, in, out, mode->window, overlap);
-   } else if (!shortBlocks) {
+      clt_mdct_forward(lookup, in, out, mode->window, overlap);
+   } else {
       const mdct_lookup *lookup = MDCT(mode);
       const int overlap = OVERLAP(mode);
-      const int N = FRAMESIZE(mode);
-      int c;
-      VARDECL(celt_word32_t, x);
-      VARDECL(celt_word32_t, tmp);
+      int N = FRAMESIZE(mode);
+      int B = 1;
+      int b, c;
+      VARDECL(celt_word32, x);
+      VARDECL(celt_word32, tmp);
       SAVE_STACK;
-      ALLOC(x, N+overlap, celt_word32_t);
-      ALLOC(tmp, N, celt_word32_t);
-      for (c=0;c<C;c++)
+      if (shortBlocks)
       {
-         int j;
-         for (j=0;j<N+overlap;j++)
-            x[j] = in[C*j+c];
-         mdct_forward(lookup, x, tmp, mode->window, overlap);
-         /* Interleaving the sub-frames */
-         for (j=0;j<N;j++)
-            out[j+c*N] = tmp[j];
+         lookup = &mode->shortMdct;
+         N = mode->shortMdctSize;
+         B = mode->nbShortMdcts;
       }
-      RESTORE_STACK;
-   } else {
-      const mdct_lookup *lookup = &mode->shortMdct;
-      const int overlap = mode->overlap;
-      const int N = mode->shortMdctSize;
-      int b, c;
-      VARDECL(celt_word32_t, x);
-      VARDECL(celt_word32_t, tmp);
-      SAVE_STACK;
-      ALLOC(x, N+overlap, celt_word32_t);
-      ALLOC(tmp, N, celt_word32_t);
+      ALLOC(x, N+overlap, celt_word32);
+      ALLOC(tmp, N, celt_word32);
       for (c=0;c<C;c++)
       {
-         int B = mode->nbShortMdcts;
          for (b=0;b<B;b++)
          {
             int j;
             for (j=0;j<N+overlap;j++)
                x[j] = in[C*(b*N+j)+c];
-            mdct_forward(lookup, x, tmp, mode->window, overlap);
+            clt_mdct_forward(lookup, x, tmp, mode->window, overlap);
             /* Interleaving the sub-frames */
             for (j=0;j<N;j++)
                out[(j*B+b)+c*N*B] = tmp[j];
@@ -345,10 +372,10 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t * re
 
 /** Compute the IMDCT and apply window for all sub-frames and 
     all channels in a frame */
-static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t *X, int transient_time, int transient_shift, celt_sig_t * restrict out_mem)
+static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, int transient_time, int transient_shift, celt_sig * restrict out_mem, int _C)
 {
    int c, N4;
-   const int C = CHANNELS(mode);
+   const int C = CHANNELS(_C);
    const int N = FRAMESIZE(mode);
    const int overlap = OVERLAP(mode);
    N4 = (N-overlap)>>1;
@@ -357,49 +384,38 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t
       int j;
       if (transient_shift==0 && C==1 && !shortBlocks) {
          const mdct_lookup *lookup = MDCT(mode);
-         mdct_backward(lookup, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap);
-      } else if (!shortBlocks) {
-         const mdct_lookup *lookup = MDCT(mode);
-         VARDECL(celt_word32_t, x);
-         VARDECL(celt_word32_t, tmp);
-         SAVE_STACK;
-         ALLOC(x, 2*N, celt_word32_t);
-         ALLOC(tmp, N, celt_word32_t);
-         /* De-interleaving the sub-frames */
-         for (j=0;j<N;j++)
-            tmp[j] = X[j+c*N];
-         /* Prevents problems from the imdct doing the overlap-add */
-         CELT_MEMSET(x+N4, 0, N);
-         mdct_backward(lookup, tmp, x, mode->window, overlap);
-         celt_assert(transient_shift == 0);
-         /* The first and last part would need to be set to zero if we actually
-            wanted to use them. */
-         for (j=0;j<overlap;j++)
-            out_mem[C*(MAX_PERIOD-N)+C*j+c] += x[j+N4];
-         for (j=0;j<overlap;j++)
-            out_mem[C*(MAX_PERIOD)+C*(overlap-j-1)+c] = x[2*N-j-N4-1];
-         for (j=0;j<2*N4;j++)
-            out_mem[C*(MAX_PERIOD-N)+C*(j+overlap)+c] = x[j+N4+overlap];
-         RESTORE_STACK;
+         clt_mdct_backward(lookup, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap);
       } else {
+         VARDECL(celt_word32, x);
+         VARDECL(celt_word32, tmp);
          int b;
-         const int N2 = mode->shortMdctSize;
-         const int B = mode->nbShortMdcts;
-         const mdct_lookup *lookup = &mode->shortMdct;
-         VARDECL(celt_word32_t, x);
-         VARDECL(celt_word32_t, tmp);
+         int N2 = N;
+         int B = 1;
+         int n4offset=0;
+         const mdct_lookup *lookup = MDCT(mode);
          SAVE_STACK;
-         ALLOC(x, 2*N, celt_word32_t);
-         ALLOC(tmp, N, celt_word32_t);
+         
+         ALLOC(x, 2*N, celt_word32);
+         ALLOC(tmp, N, celt_word32);
+
+         if (shortBlocks)
+         {
+            lookup = &mode->shortMdct;
+            N2 = mode->shortMdctSize;
+            B = mode->nbShortMdcts;
+            n4offset = N4;
+         }
          /* Prevents problems from the imdct doing the overlap-add */
          CELT_MEMSET(x+N4, 0, N2);
+
          for (b=0;b<B;b++)
          {
             /* De-interleaving the sub-frames */
             for (j=0;j<N2;j++)
                tmp[j] = X[(j*B+b)+c*N2*B];
-            mdct_backward(lookup, tmp, x+N4+N2*b, mode->window, overlap);
+            clt_mdct_backward(lookup, tmp, x+n4offset+N2*b, mode->window, overlap);
          }
+
          if (transient_shift > 0)
          {
 #ifdef FIXED_POINT
@@ -428,13 +444,13 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t
 }
 
 #define FLAG_NONE        0
-#define FLAG_INTRA       1U<<16
-#define FLAG_PITCH       1U<<15
-#define FLAG_SHORT       1U<<14
-#define FLAG_FOLD        1U<<13
+#define FLAG_INTRA       (1U<<13)
+#define FLAG_PITCH       (1U<<12)
+#define FLAG_SHORT       (1U<<11)
+#define FLAG_FOLD        (1U<<10)
 #define FLAG_MASK        (FLAG_INTRA|FLAG_PITCH|FLAG_SHORT|FLAG_FOLD)
 
-celt_int32_t flaglist[8] = {
+static const int flaglist[8] = {
       0 /*00  */ | FLAG_FOLD,
       1 /*01  */ | FLAG_PITCH|FLAG_FOLD,
       8 /*1000*/ | FLAG_NONE,
@@ -445,7 +461,7 @@ celt_int32_t flaglist[8] = {
       7 /*111 */ | FLAG_INTRA|FLAG_SHORT|FLAG_FOLD
 };
 
-void encode_flags(ec_enc *enc, int intra_ener, int has_pitch, int shortBlocks, int has_fold)
+static void encode_flags(ec_enc *enc, int intra_ener, int has_pitch, int shortBlocks, int has_fold)
 {
    int i;
    int flags=FLAG_NONE;
@@ -461,23 +477,23 @@ void encode_flags(ec_enc *enc, int intra_ener, int has_pitch, int shortBlocks, i
    flag_bits = flaglist[i]&0xf;
    /*printf ("enc %d: %d %d %d %d\n", flag_bits, intra_ener, has_pitch, shortBlocks, has_fold);*/
    if (i<2)
-      ec_enc_bits(enc, flag_bits, 2);
+      ec_enc_uint(enc, flag_bits, 4);
    else if (i<6)
-      ec_enc_bits(enc, flag_bits, 4);
+      ec_enc_uint(enc, flag_bits, 16);
    else
-      ec_enc_bits(enc, flag_bits, 3);
+      ec_enc_uint(enc, flag_bits, 8);
 }
 
-void decode_flags(ec_dec *dec, int *intra_ener, int *has_pitch, int *shortBlocks, int *has_fold)
+static void decode_flags(ec_dec *dec, int *intra_ener, int *has_pitch, int *shortBlocks, int *has_fold)
 {
    int i;
    int flag_bits;
-   flag_bits = ec_dec_bits(dec, 2);
+   flag_bits = ec_dec_uint(dec, 4);
    /*printf ("(%d) ", flag_bits);*/
    if (flag_bits==2)
-      flag_bits = (flag_bits<<2) | ec_dec_bits(dec, 2);
+      flag_bits = (flag_bits<<2) | ec_dec_uint(dec, 4);
    else if (flag_bits==3)
-      flag_bits = (flag_bits<<1) | ec_dec_bits(dec, 1);
+      flag_bits = (flag_bits<<1) | ec_dec_uint(dec, 2);
    for (i=0;i<8;i++)
       if (flag_bits == (flaglist[i]&0xf))
          break;
@@ -489,46 +505,83 @@ void decode_flags(ec_dec *dec, int *intra_ener, int *has_pitch, int *shortBlocks
    /*printf ("dec %d: %d %d %d %d\n", flag_bits, *intra_ener, *has_pitch, *shortBlocks, *has_fold);*/
 }
 
+void deemphasis(celt_sig *in, celt_word16 *pcm, int N, int _C, celt_word16 coef, celt_sig *mem)
+{
+   const int C = CHANNELS(_C);
+   int c;
+   for (c=0;c<C;c++)
+   {
+      int j;
+      celt_sig * restrict x;
+      celt_word16  * restrict y;
+      celt_sig m = mem[c];
+      x = &in[C*(MAX_PERIOD-N)+c];
+      y = pcm+c;
+      for (j=0;j<N;j++)
+      {
+         celt_sig tmp = MAC16_32_Q15(*x, coef,m);
+         m = tmp;
+         *y = SCALEOUT(SIG2WORD16(tmp));
+         x+=C;
+         y+=C;
+      }
+      mem[c] = m;
+   }
+}
+
+static void mdct_shape(const CELTMode *mode, celt_norm *X, int start, int end, int N, int nbShortMdcts, int mdct_weight_shift, int _C)
+{
+   int m, i, c;
+   const int C = CHANNELS(_C);
+   for (c=0;c<C;c++)
+      for (m=start;m<end;m++)
+         for (i=m+c*N;i<(c+1)*N;i+=nbShortMdcts)
+#ifdef FIXED_POINT
+            X[i] = SHR16(X[i], mdct_weight_shift);
+#else
+            X[i] = (1.f/(1<<mdct_weight_shift))*X[i];
+#endif
+   renormalise_bands(mode, X, C);
+}
+
+
 #ifdef FIXED_POINT
-int celt_encode(CELTEncoder * restrict st, const celt_int16_t * pcm, celt_int16_t * optional_synthesis, unsigned char *compressed, int nbCompressedBytes)
+int celt_encode(CELTEncoder * restrict st, const celt_int16 * pcm, celt_int16 * optional_synthesis, unsigned char *compressed, int nbCompressedBytes)
 {
 #else
-int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_sig_t * optional_synthesis, unsigned char *compressed, int nbCompressedBytes)
+int celt_encode_float(CELTEncoder * restrict st, const celt_sig * pcm, celt_sig * optional_synthesis, unsigned char *compressed, int nbCompressedBytes)
 {
 #endif
-   int i, c, N, N4;
+   int i, c, N, NN, N4;
    int has_pitch;
    int pitch_index;
    int bits;
    int has_fold=1;
-   unsigned coarse_needed;
+   int coarse_needed;
    ec_byte_buffer buf;
    ec_enc         enc;
-   VARDECL(celt_sig_t, in);
-   VARDECL(celt_sig_t, freq);
-   VARDECL(celt_norm_t, X);
-   VARDECL(celt_norm_t, P);
-   VARDECL(celt_ener_t, bandE);
-   VARDECL(celt_word16_t, bandLogE);
-   VARDECL(celt_pgain_t, gains);
+   VARDECL(celt_sig, in);
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_sig, pitch_freq);
+   VARDECL(celt_norm, X);
+   VARDECL(celt_ener, bandE);
+   VARDECL(celt_word16, bandLogE);
    VARDECL(int, fine_quant);
-   VARDECL(celt_word16_t, error);
+   VARDECL(celt_word16, error);
    VARDECL(int, pulses);
    VARDECL(int, offsets);
    VARDECL(int, fine_priority);
-#ifdef EXP_PSY
-   VARDECL(celt_word32_t, mask);
-   VARDECL(celt_word32_t, tonality);
-   VARDECL(celt_word32_t, bandM);
-   VARDECL(celt_ener_t, bandN);
-#endif
    int intra_ener = 0;
    int shortBlocks=0;
    int transient_time;
    int transient_shift;
-   const int C = CHANNELS(st->mode);
+   int resynth;
+   const int C = CHANNELS(st->channels);
    int mdct_weight_shift = 0;
    int mdct_weight_pos=0;
+   int gain_id=0;
+   int norm_rate;
+   int start=0;
    SAVE_STACK;
 
    if (check_encoder(st) != CELT_OK)
@@ -537,7 +590,7 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
 
-   if (nbCompressedBytes<0)
+   if (nbCompressedBytes<0 || pcm==NULL)
      return CELT_BAD_ARG; 
 
    /* The memset is important for now in case the encoder doesn't 
@@ -548,17 +601,17 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
 
    N = st->block_size;
    N4 = (N-st->overlap)>>1;
-   ALLOC(in, 2*C*N-2*C*N4, celt_sig_t);
+   ALLOC(in, 2*C*N-2*C*N4, celt_sig);
 
    CELT_COPY(in, st->in_mem, C*st->overlap);
    for (c=0;c<C;c++)
    {
-      const celt_word16_t * restrict pcmp = pcm+c;
-      celt_sig_t * restrict inp = in+C*st->overlap+c;
+      const celt_word16 * restrict pcmp = pcm+c;
+      celt_sig * restrict inp = in+C*st->overlap+c;
       for (i=0;i<N;i++)
       {
          /* Apply pre-emphasis */
-         celt_sig_t tmp = SCALEIN(SHL32(EXTEND32(*pcmp), SIG_SHIFT));
+         celt_sig tmp = SCALEIN(SHL32(EXTEND32(*pcmp), SIG_SHIFT));
          *inp = SUB32(tmp, SHR32(MULT16_16(preemph,st->preemph_memE[c]),3));
          st->preemph_memE[c] = SCALEIN(*pcmp);
          inp += C;
@@ -572,7 +625,9 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
    transient_shift = 0;
    shortBlocks = 0;
 
-   if (st->mode->nbShortMdcts > 1 && transient_analysis(in, N+st->overlap, C, &transient_time, &transient_shift))
+   resynth = st->pitch_available>0 || optional_synthesis!=NULL;
+
+   if (st->mode->nbShortMdcts > 1 && transient_analysis(in, N+st->overlap, C, &transient_time, &transient_shift, &st->frame_max))
    {
 #ifndef FIXED_POINT
       float gain_1;
@@ -601,23 +656,72 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
       has_fold = 1;
    }
 
-   ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
-   ALLOC(bandE,st->mode->nbEBands*C, celt_ener_t);
-   ALLOC(bandLogE,st->mode->nbEBands*C, celt_word16_t);
+   ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
+   ALLOC(bandE,st->mode->nbEBands*C, celt_ener);
+   ALLOC(bandLogE,st->mode->nbEBands*C, celt_word16);
    /* Compute MDCTs */
-   compute_mdcts(st->mode, shortBlocks, in, freq);
+   compute_mdcts(st->mode, shortBlocks, in, freq, C);
 
+
+   norm_rate = (nbCompressedBytes-5)*8*(celt_uint32)st->mode->Fs/(C*N)>>10;
+   /* Pitch analysis: we do it early to save on the peak stack space */
+   /* Don't use pitch if there isn't enough data available yet, 
+      or if we're using shortBlocks */
+   has_pitch = st->pitch_enabled && st->pitch_permitted && (N <= 512) 
+            && (st->pitch_available >= MAX_PERIOD) && (!shortBlocks)
+            && norm_rate < 50;
+   if (has_pitch)
+   {
+      VARDECL(celt_word16, x_lp);
+      SAVE_STACK;
+      ALLOC(x_lp, (2*N-2*N4)>>1, celt_word16);
+      pitch_downsample(in, x_lp, 2*N-2*N4, N, C, &st->xmem, &st->pitch_buf[MAX_PERIOD>>1]);
+      pitch_search(st->mode, x_lp, st->pitch_buf, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index, &st->xmem);
+      RESTORE_STACK;
+   }
+
+   /* Deferred allocation after find_spectral_pitch() to reduce 
+      the peak memory usage */
+   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
+
+   ALLOC(pitch_freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
+   if (has_pitch)
+   {
+      compute_mdcts(st->mode, 0, st->out_mem+pitch_index*C, pitch_freq, C);
+      has_pitch = compute_pitch_gain(st->mode, freq, pitch_freq, norm_rate, &gain_id, C, &st->gain_prod);
+   }
+   
+   if (has_pitch)
+      apply_pitch(st->mode, freq, pitch_freq, gain_id, 1, C);
+
+   compute_band_energies(st->mode, freq, bandE, C);
+   for (i=0;i<st->mode->nbEBands*C;i++)
+      bandLogE[i] = amp2Log(bandE[i]);
+
+   /* Band normalisation */
+   normalise_bands(st->mode, freq, X, bandE, C);
+   if (!shortBlocks && !folding_decision(st->mode, X, &st->tonal_average, &st->fold_decision, C))
+      has_fold = 0;
+
+   /* Don't use intra energy when we're operating at low bit-rate */
+   intra_ener = st->force_intra || (!has_pitch && st->delayedIntra && nbCompressedBytes > st->mode->nbEBands);
+   if (shortBlocks || intra_decision(bandLogE, st->oldBandE, st->mode->nbEBands))
+      st->delayedIntra = 1;
+   else
+      st->delayedIntra = 0;
+
+   NN = st->mode->eBands[st->mode->nbEBands];
    if (shortBlocks && !transient_shift) 
    {
-      celt_word32_t sum[4]={1,1,1,1};
+      celt_word32 sum[8]={1,1,1,1,1,1,1,1};
       int m;
       for (c=0;c<C;c++)
       {
          m=0;
          do {
-            celt_word32_t tmp=0;
-            for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
-               tmp += ABS32(freq[i]);
+            celt_word32 tmp=0;
+            for (i=m+c*N;i<c*N+NN;i+=st->mode->nbShortMdcts)
+               tmp += ABS32(X[i]);
             sum[m++] += tmp;
          } while (m<st->mode->nbShortMdcts);
       }
@@ -635,13 +739,6 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
          }
          m++;
       } while (m<st->mode->nbShortMdcts-1);
-      if (mdct_weight_shift)
-      {
-         for (c=0;c<C;c++)
-            for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-               for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
-                  freq[i] = SHR32(freq[i],mdct_weight_shift);
-      }
 #else
       do {
          if (sum[m+1] > 8*sum[m])
@@ -655,184 +752,111 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
          }
          m++;
       } while (m<st->mode->nbShortMdcts-1);
+#endif
       if (mdct_weight_shift)
       {
-         for (c=0;c<C;c++)
-            for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-               for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
-                  freq[i] = (1./(1<<mdct_weight_shift))*freq[i];
+         mdct_shape(st->mode, X, mdct_weight_pos+1, st->mode->nbShortMdcts, N, st->mode->nbShortMdcts, mdct_weight_shift, C);
+         renormalise_bands(st->mode, X, C);
       }
-#endif
    }
 
-   compute_band_energies(st->mode, freq, bandE);
-   for (i=0;i<st->mode->nbEBands*C;i++)
-      bandLogE[i] = amp2Log(bandE[i]);
 
-   /* Don't use intra energy when we're operating at low bit-rate */
-   intra_ener = st->force_intra || (st->delayedIntra && nbCompressedBytes > st->mode->nbEBands);
-   if (shortBlocks || intra_decision(bandLogE, st->oldBandE, st->mode->nbEBands))
-      st->delayedIntra = 1;
-   else
-      st->delayedIntra = 0;
-
-   /* Pitch analysis: we do it early to save on the peak stack space */
-   /* Don't use pitch if there isn't enough data available yet, 
-      or if we're using shortBlocks */
-   has_pitch = st->pitch_enabled && st->pitch_permitted && (st->pitch_available >= MAX_PERIOD) && (!shortBlocks) && !intra_ener;
-#ifdef EXP_PSY
-   ALLOC(tonality, MAX_PERIOD/4, celt_word16_t);
-   {
-      VARDECL(celt_word16_t, X);
-      ALLOC(X, MAX_PERIOD/2, celt_word16_t);
-      find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, X, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
-      compute_tonality(st->mode, X, st->psy_mem, MAX_PERIOD, tonality, MAX_PERIOD/4);
-   }
-#else
-   if (has_pitch)
-   {
-      find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
-   }
-#endif
-
-#ifdef EXP_PSY
-   ALLOC(mask, N, celt_sig_t);
-   compute_mdct_masking(&st->psy, freq, tonality, st->psy_mem, mask, C*N);
-   /*for (i=0;i<256;i++)
-      printf ("%f %f %f ", freq[i], tonality[i], mask[i]);
-   printf ("\n");*/
-#endif
-
-   /* Deferred allocation after find_spectral_pitch() to reduce 
-      the peak memory usage */
-   ALLOC(X, C*N, celt_norm_t);         /**< Interleaved normalised MDCTs */
-   ALLOC(P, C*N, celt_norm_t);         /**< Interleaved normalised pitch MDCTs*/
-   ALLOC(gains,st->mode->nbPBands, celt_pgain_t);
-
-
-   /* Band normalisation */
-   normalise_bands(st->mode, freq, X, bandE);
-   if (!shortBlocks && !folding_decision(st->mode, X, &st->tonal_average, &st->fold_decision))
-      has_fold = 0;
-#ifdef EXP_PSY
-   ALLOC(bandN,C*st->mode->nbEBands, celt_ener_t);
-   ALLOC(bandM,st->mode->nbEBands, celt_ener_t);
-   compute_noise_energies(st->mode, freq, tonality, bandN);
-
-   /*for (i=0;i<st->mode->nbEBands;i++)
-      printf ("%f ", (.1+bandN[i])/(.1+bandE[i]));
-   printf ("\n");*/
-   has_fold = 0;
-   for (i=st->mode->nbPBands;i<st->mode->nbEBands;i++)
-      if (bandN[i] < .4*bandE[i])
-         has_fold++;
-   /*printf ("%d\n", has_fold);*/
-   if (has_fold>=2)
-      has_fold = 0;
-   else
-      has_fold = 1;
-   for (i=0;i<N;i++)
-      mask[i] = sqrt(mask[i]);
-   compute_band_energies(st->mode, mask, bandM);
-   /*for (i=0;i<st->mode->nbEBands;i++)
-      printf ("%f %f ", bandE[i], bandM[i]);
-   printf ("\n");*/
-#endif
-
-   /* Compute MDCTs of the pitch part */
-   if (has_pitch)
-   {
-      celt_word32_t curr_power, pitch_power=0;
-      /* Normalise the pitch vector as well (discard the energies) */
-      VARDECL(celt_ener_t, bandEp);
-      
-      compute_mdcts(st->mode, 0, st->out_mem+pitch_index*C, freq);
-      ALLOC(bandEp, st->mode->nbEBands*st->mode->nbChannels, celt_ener_t);
-      compute_band_energies(st->mode, freq, bandEp);
-      normalise_bands(st->mode, freq, P, bandEp);
-      pitch_power = bandEp[0]+bandEp[1]+bandEp[2];
-      curr_power = bandE[0]+bandE[1]+bandE[2];
-      if (C>1)
-      {
-         pitch_power += bandEp[0+st->mode->nbEBands]+bandEp[1+st->mode->nbEBands]+bandEp[2+st->mode->nbEBands];
-         curr_power += bandE[0+st->mode->nbEBands]+bandE[1+st->mode->nbEBands]+bandE[2+st->mode->nbEBands];
-      }
-      /* Check if we can safely use the pitch (i.e. effective gain 
-      isn't too high) */
-      if ((MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power))
-      {
-         /* Pitch prediction */
-         has_pitch = compute_pitch_gain(st->mode, X, P, gains);
-      } else {
-         has_pitch = 0;
-      }
-   }
-   
    encode_flags(&enc, intra_ener, has_pitch, shortBlocks, has_fold);
    if (has_pitch)
    {
       ec_enc_uint(&enc, pitch_index, MAX_PERIOD-(2*N-2*N4));
-   } else {
-      for (i=0;i<st->mode->nbPBands;i++)
-         gains[i] = 0;
-      for (i=0;i<C*N;i++)
-         P[i] = 0;
+      ec_enc_uint(&enc, gain_id, 16);
    }
    if (shortBlocks)
    {
       if (transient_shift)
       {
-         ec_enc_bits(&enc, transient_shift, 2);
+         ec_enc_uint(&enc, transient_shift, 4);
          ec_enc_uint(&enc, transient_time, N+st->overlap);
       } else {
-         ec_enc_bits(&enc, mdct_weight_shift, 2);
+         ec_enc_uint(&enc, mdct_weight_shift, 4);
          if (mdct_weight_shift && st->mode->nbShortMdcts!=2)
             ec_enc_uint(&enc, mdct_weight_pos, st->mode->nbShortMdcts-1);
       }
    }
 
-#ifdef STDIN_TUNING2
-   static int fine_quant[30];
-   static int pulses[30];
-   static int init=0;
-   if (!init)
-   {
-      for (i=0;i<st->mode->nbEBands;i++)
-         scanf("%d ", &fine_quant[i]);
-      for (i=0;i<st->mode->nbEBands;i++)
-         scanf("%d ", &pulses[i]);
-      init = 1;
-   }
-#else
    ALLOC(fine_quant, st->mode->nbEBands, int);
    ALLOC(pulses, st->mode->nbEBands, int);
-#endif
+
+   /* Computes the max bit-rate allowed in VBR more to avoid busting the budget */
+   if (st->vbr_rate>0)
+   {
+      celt_int32 vbr_bound, max_allowed;
+
+      vbr_bound = st->vbr_rate;
+      max_allowed = (st->vbr_rate + vbr_bound - st->vbr_reservoir)>>(BITRES+3);
+      if (max_allowed < 4)
+         max_allowed = 4;
+      if (max_allowed < nbCompressedBytes)
+         nbCompressedBytes = max_allowed;
+   }
 
    /* Bit allocation */
-   ALLOC(error, C*st->mode->nbEBands, celt_word16_t);
-   coarse_needed = quant_coarse_energy(st->mode, bandLogE, st->oldBandE, nbCompressedBytes*8/3, intra_ener, st->mode->prob, error, &enc);
+   ALLOC(error, C*st->mode->nbEBands, celt_word16);
+   coarse_needed = quant_coarse_energy(st->mode, start, bandLogE, st->oldBandE, nbCompressedBytes*4-8, intra_ener, st->mode->prob, error, &enc, C);
    coarse_needed = ((coarse_needed*3-1)>>3)+1;
-
+   if (coarse_needed > nbCompressedBytes)
+      coarse_needed = nbCompressedBytes;
    /* Variable bitrate */
-   if (st->VBR_rate>0)
+   if (st->vbr_rate>0)
    {
+     celt_word16 alpha;
+     celt_int32 delta;
      /* The target rate in 16th bits per frame */
-     int target=st->VBR_rate;
+     celt_int32 target=st->vbr_rate;
    
      /* Shortblocks get a large boost in bitrate, but since they 
         are uncommon long blocks are not greatly effected */
      if (shortBlocks)
        target*=2;
      else if (st->mode->nbShortMdcts > 1)
-       target-=(target+14)/28;     
+       target-=(target+14)/28;
 
      /* The average energy is removed from the target and the actual 
         energy added*/
-     target=target-588+ec_enc_tell(&enc, 4);
+     target=target+st->vbr_offset-588+ec_enc_tell(&enc, BITRES);
 
      /* In VBR mode the frame size must not be reduced so much that it would result in the coarse energy busting its budget */
      target=IMAX(coarse_needed,(target+64)/128);
-     nbCompressedBytes=IMIN(nbCompressedBytes,target);
+     target=IMIN(nbCompressedBytes,target);
+     /* Make the adaptation coef (alpha) higher at the beginning */
+     if (st->vbr_count < 990)
+     {
+        st->vbr_count++;
+        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+10),16));
+        /*printf ("%d %d\n", st->vbr_count+10, alpha);*/
+     } else
+        alpha = QCONST16(.001f,15);
+
+     /* By how much did we "miss" the target on that frame */
+     delta = (8<<BITRES)*(celt_int32)target - st->vbr_rate;
+     /* How many bits have we used in excess of what we're allowed */
+     st->vbr_reservoir += delta;
+     /*printf ("%d\n", st->vbr_reservoir);*/
+
+     /* Compute the offset we need to apply in order to reach the target */
+     st->vbr_drift += MULT16_32_Q15(alpha,delta-st->vbr_offset-st->vbr_drift);
+     st->vbr_offset = -st->vbr_drift;
+     /*printf ("%d\n", st->vbr_drift);*/
+
+     /* We could use any multiple of vbr_rate as bound (depending on the delay) */
+     if (st->vbr_reservoir < 0)
+     {
+        /* We're under the min value -- increase rate */
+        int adjust = 1-(st->vbr_reservoir-1)/(8<<BITRES);
+        st->vbr_reservoir += adjust*(8<<BITRES);
+        target += adjust;
+        /*printf ("+%d\n", adjust);*/
+     }
+     if (target < nbCompressedBytes)
+        nbCompressedBytes = target;
+     /* This moves the raw bits to take into account the new compressed size */
+     ec_byte_shrink(&buf, nbCompressedBytes);
    }
 
    ALLOC(offsets, st->mode->nbEBands, int);
@@ -841,63 +865,46 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
    for (i=0;i<st->mode->nbEBands;i++)
       offsets[i] = 0;
    bits = nbCompressedBytes*8 - ec_enc_tell(&enc, 0) - 1;
-   if (has_pitch)
-      bits -= st->mode->nbPBands;
-#ifndef STDIN_TUNING
-   compute_allocation(st->mode, offsets, bits, pulses, fine_quant, fine_priority);
-#endif
+   compute_allocation(st->mode, start, offsets, bits, pulses, fine_quant, fine_priority, C);
 
-   quant_fine_energy(st->mode, bandE, st->oldBandE, error, fine_quant, &enc);
+   quant_fine_energy(st->mode, start, bandE, st->oldBandE, error, fine_quant, &enc, C);
 
    /* Residual quantisation */
    if (C==1)
-      quant_bands(st->mode, X, P, NULL, has_pitch, gains, bandE, pulses, shortBlocks, has_fold, nbCompressedBytes*8, &enc);
+      quant_bands(st->mode, start, X, bandE, pulses, shortBlocks, has_fold, resynth, nbCompressedBytes*8, 1, &enc);
 #ifndef DISABLE_STEREO
    else
-      quant_bands_stereo(st->mode, X, P, NULL, has_pitch, gains, bandE, pulses, shortBlocks, has_fold, nbCompressedBytes*8, &enc);
+      quant_bands_stereo(st->mode, start, X, bandE, pulses, shortBlocks, has_fold, resynth, nbCompressedBytes*8, &enc);
 #endif
 
-   quant_energy_finalise(st->mode, bandE, st->oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_enc_tell(&enc, 0), &enc);
+   quant_energy_finalise(st->mode, start, bandE, st->oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_enc_tell(&enc, 0), &enc, C);
 
    /* Re-synthesis of the coded audio if required */
-   if (st->pitch_available>0 || optional_synthesis!=NULL)
+   if (resynth)
    {
       if (st->pitch_available>0 && st->pitch_available<MAX_PERIOD)
         st->pitch_available+=st->frame_size;
 
-      /* Synthesis */
-      denormalise_bands(st->mode, X, freq, bandE);
-      
-      
-      CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->overlap-N));
-      
       if (mdct_weight_shift)
       {
-         int m;
-         for (c=0;c<C;c++)
-            for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-               for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
-#ifdef FIXED_POINT
-                  freq[i] = SHL32(freq[i], mdct_weight_shift);
-#else
-                  freq[i] = (1<<mdct_weight_shift)*freq[i];
-#endif
+         mdct_shape(st->mode, X, 0, mdct_weight_pos+1, N, st->mode->nbShortMdcts, mdct_weight_shift, C);
       }
-      compute_inv_mdcts(st->mode, shortBlocks, freq, transient_time, transient_shift, st->out_mem);
+
+      /* Synthesis */
+      denormalise_bands(st->mode, X, freq, bandE, C);
+
+      CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->overlap-N));
+
+      if (has_pitch)
+         apply_pitch(st->mode, freq, pitch_freq, gain_id, 0, C);
+      
+      compute_inv_mdcts(st->mode, shortBlocks, freq, transient_time, transient_shift, st->out_mem, C);
+
       /* De-emphasis and put everything back at the right place 
          in the synthesis history */
       if (optional_synthesis != NULL) {
-         for (c=0;c<C;c++)
-         {
-            int j;
-            for (j=0;j<N;j++)
-            {
-               celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
-                                   preemph,st->preemph_memD[c]);
-               st->preemph_memD[c] = tmp;
-               optional_synthesis[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
-            }
-         }
+         deemphasis(st->out_mem, optional_synthesis, N, C, preemph, st->preemph_memD);
+
       }
    }
 
@@ -912,7 +919,8 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
 int celt_encode_float(CELTEncoder * restrict st, const float * pcm, float * optional_synthesis, unsigned char *compressed, int nbCompressedBytes)
 {
    int j, ret, C, N;
-   VARDECL(celt_int16_t, in);
+   VARDECL(celt_int16, in);
+   SAVE_STACK;
 
    if (check_encoder(st) != CELT_OK)
       return CELT_INVALID_STATE;
@@ -920,10 +928,12 @@ int celt_encode_float(CELTEncoder * restrict st, const float * pcm, float * opti
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
 
-   SAVE_STACK;
-   C = CHANNELS(st->mode);
+   if (pcm==NULL)
+      return CELT_BAD_ARG;
+
+   C = CHANNELS(st->channels);
    N = st->block_size;
-   ALLOC(in, C*N, celt_int16_t);
+   ALLOC(in, C*N, celt_int16);
 
    for (j=0;j<C*N;j++)
      in[j] = FLOAT2INT16(pcm[j]);
@@ -941,10 +951,11 @@ int celt_encode_float(CELTEncoder * restrict st, const float * pcm, float * opti
 }
 #endif /*DISABLE_FLOAT_API*/
 #else
-int celt_encode(CELTEncoder * restrict st, const celt_int16_t * pcm, celt_int16_t * optional_synthesis, unsigned char *compressed, int nbCompressedBytes)
+int celt_encode(CELTEncoder * restrict st, const celt_int16 * pcm, celt_int16 * optional_synthesis, unsigned char *compressed, int nbCompressedBytes)
 {
    int j, ret, C, N;
-   VARDECL(celt_sig_t, in);
+   VARDECL(celt_sig, in);
+   SAVE_STACK;
 
    if (check_encoder(st) != CELT_OK)
       return CELT_INVALID_STATE;
@@ -952,10 +963,12 @@ int celt_encode(CELTEncoder * restrict st, const celt_int16_t * pcm, celt_int16_
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
 
-   SAVE_STACK;
-   C=CHANNELS(st->mode);
+   if (pcm==NULL)
+      return CELT_BAD_ARG;
+
+   C=CHANNELS(st->channels);
    N=st->block_size;
-   ALLOC(in, C*N, celt_sig_t);
+   ALLOC(in, C*N, celt_sig);
    for (j=0;j<C*N;j++) {
      in[j] = SCALEOUT(pcm[j]);
    }
@@ -994,7 +1007,7 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
       break;
       case CELT_SET_COMPLEXITY_REQUEST:
       {
-         int value = va_arg(ap, celt_int32_t);
+         int value = va_arg(ap, celt_int32);
          if (value<0 || value>10)
             goto bad_arg;
          if (value<=2) {
@@ -1009,14 +1022,14 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
       break;
       case CELT_SET_PREDICTION_REQUEST:
       {
-         int value = va_arg(ap, celt_int32_t);
+         int value = va_arg(ap, celt_int32);
          if (value<0 || value>2)
             goto bad_arg;
          if (value==0)
          {
             st->force_intra   = 1;
             st->pitch_permitted = 0;
-         } else if (value=1) {
+         } else if (value==1) {
             st->force_intra   = 0;
             st->pitch_permitted = 0;
          } else {
@@ -1027,19 +1040,19 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
       break;
       case CELT_SET_VBR_RATE_REQUEST:
       {
-         int value = va_arg(ap, celt_int32_t);
+         celt_int32 value = va_arg(ap, celt_int32);
          if (value<0)
             goto bad_arg;
          if (value>3072000)
             value = 3072000;
-         st->VBR_rate = ((st->mode->Fs<<3)+(st->block_size>>1))/st->block_size;
-         st->VBR_rate = ((value<<7)+(st->VBR_rate>>1))/st->VBR_rate;
+         st->vbr_rate = ((st->mode->Fs<<3)+(st->block_size>>1))/st->block_size;
+         st->vbr_rate = ((value<<7)+(st->vbr_rate>>1))/st->vbr_rate;
       }
       break;
       case CELT_RESET_STATE:
       {
          const CELTMode *mode = st->mode;
-         int C = mode->nbChannels;
+         int C = st->channels;
 
          if (st->pitch_available > 0) st->pitch_available = 1;
 
@@ -1051,6 +1064,17 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
          CELT_MEMSET(st->preemph_memE, 0, C);
          CELT_MEMSET(st->preemph_memD, 0, C);
          st->delayedIntra = 1;
+
+         st->fold_decision = 1;
+         st->tonal_average = QCONST16(1.f,8);
+         st->gain_prod = 0;
+         st->vbr_reservoir = 0;
+         st->vbr_drift = 0;
+         st->vbr_offset = 0;
+         st->vbr_count = 0;
+         st->xmem = 0;
+         st->frame_max = 0;
+         CELT_MEMSET(st->pitch_buf, 0, (MAX_PERIOD>>1)+2);
       }
       break;
       default:
@@ -1088,23 +1112,29 @@ bad_request:
  @brief Decoder state
  */
 struct CELTDecoder {
-   celt_uint32_t marker;
+   celt_uint32 marker;
    const CELTMode *mode;
    int frame_size;
    int block_size;
    int overlap;
+   int channels;
 
    ec_byte_buffer buf;
    ec_enc         enc;
 
-   celt_sig_t * restrict preemph_memD;
+   celt_sig * restrict preemph_memD;
 
-   celt_sig_t *out_mem;
-   celt_sig_t *decode_mem;
+   celt_sig *out_mem;
+   celt_sig *decode_mem;
 
-   celt_word16_t *oldBandE;
+   celt_word16 *oldBandE;
    
+#ifdef NEW_PLC
+   celt_word16 *lpc;
+#endif
+
    int last_pitch_index;
+   int loss_count;
 };
 
 int check_decoder(const CELTDecoder *st) 
@@ -1123,44 +1153,81 @@ int check_decoder(const CELTDecoder *st)
    return CELT_INVALID_STATE;
 }
 
-CELTDecoder *celt_decoder_create(const CELTMode *mode)
+CELTDecoder *celt_decoder_create(const CELTMode *mode, int channels, int *error)
 {
    int N, C;
    CELTDecoder *st;
 
    if (check_mode(mode) != CELT_OK)
+   {
+      if (error)
+         *error = CELT_INVALID_MODE;
+      return NULL;
+   }
+#ifdef DISABLE_STEREO
+   if (channels > 1)
+   {
+      celt_warning("Stereo support was disable from this build");
+      if (error)
+         *error = CELT_BAD_ARG;
+      return NULL;
+   }
+#endif
+
+   if (channels < 0 || channels > 2)
+   {
+      celt_warning("Only mono and stereo supported");
+      if (error)
+         *error = CELT_BAD_ARG;
       return NULL;
+   }
 
    N = mode->mdctSize;
-   C = CHANNELS(mode);
+   C = CHANNELS(channels);
    st = celt_alloc(sizeof(CELTDecoder));
 
    if (st==NULL)
+   {
+      if (error)
+         *error = CELT_ALLOC_FAIL;
       return NULL;
-   
+   }
+
    st->marker = DECODERPARTIAL;
    st->mode = mode;
    st->frame_size = N;
    st->block_size = N;
    st->overlap = mode->overlap;
+   st->channels = channels;
 
-   st->decode_mem = celt_alloc((DECODE_BUFFER_SIZE+st->overlap)*C*sizeof(celt_sig_t));
+   st->decode_mem = celt_alloc((DECODE_BUFFER_SIZE+st->overlap)*C*sizeof(celt_sig));
    st->out_mem = st->decode_mem+DECODE_BUFFER_SIZE-MAX_PERIOD;
    
-   st->oldBandE = (celt_word16_t*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16_t));
+   st->oldBandE = (celt_word16*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16));
    
-   st->preemph_memD = (celt_sig_t*)celt_alloc(C*sizeof(celt_sig_t));
+   st->preemph_memD = (celt_sig*)celt_alloc(C*sizeof(celt_sig));
+
+#ifdef NEW_PLC
+   st->lpc = (celt_word16*)celt_alloc(C*LPC_ORDER*sizeof(celt_word16));
+#endif
 
-   st->last_pitch_index = 0;
+   st->loss_count = 0;
 
    if ((st->decode_mem!=NULL) && (st->out_mem!=NULL) && (st->oldBandE!=NULL) &&
+#ifdef NEW_PLC
+         (st->lpc!=NULL) &&
+#endif
        (st->preemph_memD!=NULL))
    {
+      if (error)
+         *error = CELT_OK;
       st->marker = DECODERVALID;
       return st;
    }
    /* If the setup fails for some reason deallocate it. */
    celt_decoder_destroy(st);
+   if (error)
+      *error = CELT_ALLOC_FAIL;
    return NULL;
 }
 
@@ -1192,87 +1259,192 @@ void celt_decoder_destroy(CELTDecoder *st)
    celt_free(st->decode_mem);
    celt_free(st->oldBandE);
    celt_free(st->preemph_memD);
+
+#ifdef NEW_PLC
+   celt_free(st->lpc);
+#endif
    
    st->marker = DECODERFREED;
    
    celt_free(st);
 }
 
-/** Handles lost packets by just copying past data with the same
-    offset as the last
-    pitch period */
-#ifdef NEW_PLC
-#include "plc.c"
-#else
-static void celt_decode_lost(CELTDecoder * restrict st, celt_word16_t * restrict pcm)
+static void celt_decode_lost(CELTDecoder * restrict st, celt_word16 * restrict pcm)
 {
    int c, N;
    int pitch_index;
+   int overlap = st->mode->overlap;
+   celt_word16 fade = Q15ONE;
    int i, len;
-   VARDECL(celt_sig_t, freq);
-   const int C = CHANNELS(st->mode);
+   VARDECL(celt_sig, freq);
+   const int C = CHANNELS(st->channels);
    int offset;
    SAVE_STACK;
    N = st->block_size;
-   ALLOC(freq,C*N, celt_sig_t); /**< Interleaved signal MDCTs */
    
    len = N+st->mode->overlap;
-#if 0
-   pitch_index = st->last_pitch_index;
    
-   /* Use the pitch MDCT as the "guessed" signal */
-   compute_mdcts(st->mode, st->mode->window, st->out_mem+pitch_index*C, freq);
+   if (st->loss_count == 0)
+   {
+      celt_word16 pitch_buf[MAX_PERIOD>>1];
+      celt_word32 tmp=0;
+      celt_word32 mem0[2]={0,0};
+      celt_word16 mem1[2]={0,0};
+      pitch_downsample(st->out_mem, pitch_buf, MAX_PERIOD, MAX_PERIOD,
+                       C, mem0, mem1);
+      pitch_search(st->mode, pitch_buf+((MAX_PERIOD-len)>>1), pitch_buf, len,
+                   MAX_PERIOD-len-100, &pitch_index, &tmp);
+      pitch_index = MAX_PERIOD-len-pitch_index;
+      st->last_pitch_index = pitch_index;
+   } else {
+      pitch_index = st->last_pitch_index;
+      if (st->loss_count < 5)
+         fade = QCONST16(.8f,15);
+      else
+         fade = 0;
+   }
 
-#else
-   find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, st->out_mem+MAX_PERIOD-len, st->out_mem, st->mode->window, NULL, len, MAX_PERIOD-len-100, &pitch_index);
-   pitch_index = MAX_PERIOD-len-pitch_index;
+#ifndef NEW_PLC
    offset = MAX_PERIOD-pitch_index;
+   ALLOC(freq,C*N, celt_sig); /**< Interleaved signal MDCTs */
    while (offset+len >= MAX_PERIOD)
       offset -= pitch_index;
-   compute_mdcts(st->mode, 0, st->out_mem+offset*C, freq);
+   compute_mdcts(st->mode, 0, st->out_mem+offset*C, freq, C);
    for (i=0;i<C*N;i++)
-      freq[i] = ADD32(EPSILON, MULT16_32_Q15(QCONST16(.9f,15),freq[i]));
-#endif
-   
-   
-   
+      freq[i] = ADD32(VERY_SMALL, MULT16_32_Q15(fade,freq[i]));
+
    CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->mode->overlap-N));
    /* Compute inverse MDCTs */
-   compute_inv_mdcts(st->mode, 0, freq, -1, 0, st->out_mem);
-
+   compute_inv_mdcts(st->mode, 0, freq, -1, 0, st->out_mem, C);
+#else
    for (c=0;c<C;c++)
    {
-      int j;
-      for (j=0;j<N;j++)
+      celt_word32 e[MAX_PERIOD];
+      celt_word16 exc[MAX_PERIOD];
+      float ac[LPC_ORDER+1];
+      float decay = 1;
+      float S1=0;
+      celt_word16 mem[LPC_ORDER]={0};
+
+      offset = MAX_PERIOD-pitch_index;
+      for (i=0;i<MAX_PERIOD;i++)
+         exc[i] = ROUND16(st->out_mem[i*C+c], SIG_SHIFT);
+
+      if (st->loss_count == 0)
+      {
+         _celt_autocorr(exc, ac, st->mode->window, st->mode->overlap,
+                        LPC_ORDER, MAX_PERIOD);
+
+         /* Noise floor -50 dB */
+         ac[0] *= 1.00001;
+         /* Lag windowing */
+         for (i=1;i<=LPC_ORDER;i++)
+         {
+            /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+            ac[i] -= ac[i]*(.008*i)*(.008*i);
+         }
+
+         _celt_lpc(st->lpc+c*LPC_ORDER, ac, LPC_ORDER);
+      }
+      fir(exc, st->lpc+c*LPC_ORDER, exc, MAX_PERIOD, LPC_ORDER, mem);
+      /*for (i=0;i<MAX_PERIOD;i++)printf("%d ", exc[i]); printf("\n");*/
+      /* Check if the waveform is decaying (and if so how fast) */
+      {
+         float E1=0, E2=0;
+         int period;
+         if (pitch_index <= MAX_PERIOD/2)
+            period = pitch_index;
+         else
+            period = MAX_PERIOD/2;
+         for (i=0;i<period;i++)
+         {
+            E1 += exc[MAX_PERIOD-period+i]*exc[MAX_PERIOD-period+i];
+            E2 += exc[MAX_PERIOD-2*period+i]*exc[MAX_PERIOD-2*period+i];
+         }
+         decay = sqrt((E1+1)/(E2+1));
+         if (decay > 1)
+            decay = 1;
+      }
+
+      /* Copy excitation, taking decay into account */
+      for (i=0;i<len+st->mode->overlap;i++)
+      {
+         if (offset+i >= MAX_PERIOD)
+         {
+            offset -= pitch_index;
+            decay *= decay;
+         }
+         e[i] = decay*SHL32(EXTEND32(exc[offset+i]), SIG_SHIFT);
+         S1 += st->out_mem[offset+i]*1.*st->out_mem[offset+i];
+      }
+
+      iir(e, st->lpc+c*LPC_ORDER, e, len+st->mode->overlap, LPC_ORDER, mem);
+
+      {
+         float S2=0;
+         for (i=0;i<len+overlap;i++)
+            S2 += e[i]*1.*e[i];
+         /* This checks for an "explosion" in the synthesis (including NaNs) */
+         if (!(S1 > 0.2f*S2))
+         {
+            for (i=0;i<len+overlap;i++)
+               e[i] = 0;
+         } else if (S1 < S2)
+         {
+            float ratio = sqrt((S1+1)/(S2+1));
+            for (i=0;i<len+overlap;i++)
+               e[i] *= ratio;
+         }
+      }
+
+      for (i=0;i<MAX_PERIOD+st->mode->overlap-N;i++)
+         st->out_mem[C*i+c] = st->out_mem[C*(N+i)+c];
+
+      /* Apply TDAC to the concealed audio so that it blends with the
+         previous and next frames */
+      for (i=0;i<overlap/2;i++)
       {
-         celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
-                                preemph,st->preemph_memD[c]);
-         st->preemph_memD[c] = tmp;
-         pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
+         celt_word32 tmp1, tmp2;
+         tmp1 = MULT16_32_Q15(st->mode->window[i          ], e[i          ]) -
+                MULT16_32_Q15(st->mode->window[overlap-i-1], e[overlap-i-1]);
+         tmp2 = MULT16_32_Q15(st->mode->window[i],           e[N+overlap-1-i]) +
+                MULT16_32_Q15(st->mode->window[overlap-i-1], e[N+i          ]);
+         tmp1 = MULT16_32_Q15(fade, tmp1);
+         tmp2 = MULT16_32_Q15(fade, tmp2);
+         st->out_mem[C*(MAX_PERIOD+i)+c] = MULT16_32_Q15(st->mode->window[overlap-i-1], tmp2);
+         st->out_mem[C*(MAX_PERIOD+overlap-i-1)+c] = MULT16_32_Q15(st->mode->window[i], tmp2);
+         st->out_mem[C*(MAX_PERIOD-N+i)+c] += MULT16_32_Q15(st->mode->window[i], tmp1);
+         st->out_mem[C*(MAX_PERIOD-N+overlap-i-1)+c] -= MULT16_32_Q15(st->mode->window[overlap-i-1], tmp1);
       }
+      for (i=0;i<N-overlap;i++)
+         st->out_mem[C*(MAX_PERIOD-N+overlap+i)+c] = MULT16_32_Q15(fade, e[overlap+i]);
    }
+#endif
+
+   deemphasis(st->out_mem, pcm, N, C, preemph, st->preemph_memD);
+   
+   st->loss_count++;
+
    RESTORE_STACK;
 }
-#endif
 
 #ifdef FIXED_POINT
-int celt_decode(CELTDecoder * restrict st, const unsigned char *data, int len, celt_int16_t * restrict pcm)
+int celt_decode(CELTDecoder * restrict st, const unsigned char *data, int len, celt_int16 * restrict pcm)
 {
 #else
-int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int len, celt_sig_t * restrict pcm)
+int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int len, celt_sig * restrict pcm)
 {
 #endif
-   int i, c, N, N4;
+   int i, N, N4;
    int has_pitch, has_fold;
    int pitch_index;
    int bits;
    ec_dec dec;
    ec_byte_buffer buf;
-   VARDECL(celt_sig_t, freq);
-   VARDECL(celt_norm_t, X);
-   VARDECL(celt_norm_t, P);
-   VARDECL(celt_ener_t, bandE);
-   VARDECL(celt_pgain_t, gains);
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_sig, pitch_freq);
+   VARDECL(celt_norm, X);
+   VARDECL(celt_ener, bandE);
    VARDECL(int, fine_quant);
    VARDECL(int, pulses);
    VARDECL(int, offsets);
@@ -1283,8 +1455,10 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
    int transient_time;
    int transient_shift;
    int mdct_weight_shift=0;
-   const int C = CHANNELS(st->mode);
+   const int C = CHANNELS(st->channels);
    int mdct_weight_pos=0;
+   int gain_id=0;
+   int start=0;
    SAVE_STACK;
 
    if (check_decoder(st) != CELT_OK)
@@ -1293,14 +1467,15 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
 
+   if (pcm==NULL)
+      return CELT_BAD_ARG;
+
    N = st->block_size;
    N4 = (N-st->overlap)>>1;
 
-   ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
-   ALLOC(X, C*N, celt_norm_t);   /**< Interleaved normalised MDCTs */
-   ALLOC(P, C*N, celt_norm_t);   /**< Interleaved normalised pitch MDCTs*/
-   ALLOC(bandE, st->mode->nbEBands*C, celt_ener_t);
-   ALLOC(gains, st->mode->nbPBands, celt_pgain_t);
+   ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
+   ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+   ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
    
    if (data == NULL)
    {
@@ -1319,7 +1494,7 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
    decode_flags(&dec, &intra_ener, &has_pitch, &shortBlocks, &has_fold);
    if (shortBlocks)
    {
-      transient_shift = ec_dec_bits(&dec, 2);
+      transient_shift = ec_dec_uint(&dec, 4);
       if (transient_shift == 3)
       {
          transient_time = ec_dec_uint(&dec, N+st->mode->overlap);
@@ -1338,16 +1513,14 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
    if (has_pitch)
    {
       pitch_index = ec_dec_uint(&dec, MAX_PERIOD-(2*N-2*N4));
-      st->last_pitch_index = pitch_index;
+      gain_id = ec_dec_uint(&dec, 16);
    } else {
       pitch_index = 0;
-      for (i=0;i<st->mode->nbPBands;i++)
-         gains[i] = 0;
    }
 
    ALLOC(fine_quant, st->mode->nbEBands, int);
    /* Get band energies */
-   unquant_coarse_energy(st->mode, bandE, st->oldBandE, len*8/3, intra_ener, st->mode->prob, &dec);
+   unquant_coarse_energy(st->mode, start, bandE, st->oldBandE, len*4-8, intra_ener, st->mode->prob, &dec, C);
    
    ALLOC(pulses, st->mode->nbEBands, int);
    ALLOC(offsets, st->mode->nbEBands, int);
@@ -1357,74 +1530,52 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
       offsets[i] = 0;
 
    bits = len*8 - ec_dec_tell(&dec, 0) - 1;
-   if (has_pitch)
-      bits -= st->mode->nbPBands;
-   compute_allocation(st->mode, offsets, bits, pulses, fine_quant, fine_priority);
+   compute_allocation(st->mode, start, offsets, bits, pulses, fine_quant, fine_priority, C);
    /*bits = ec_dec_tell(&dec, 0);
    compute_fine_allocation(st->mode, fine_quant, (20*C+len*8/5-(ec_dec_tell(&dec, 0)-bits))/C);*/
    
-   unquant_fine_energy(st->mode, bandE, st->oldBandE, fine_quant, &dec);
-
+   unquant_fine_energy(st->mode, start, bandE, st->oldBandE, fine_quant, &dec, C);
 
+   ALLOC(pitch_freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
    if (has_pitch) 
    {
-      VARDECL(celt_ener_t, bandEp);
-      
       /* Pitch MDCT */
-      compute_mdcts(st->mode, 0, st->out_mem+pitch_index*C, freq);
-      ALLOC(bandEp, st->mode->nbEBands*C, celt_ener_t);
-      compute_band_energies(st->mode, freq, bandEp);
-      normalise_bands(st->mode, freq, P, bandEp);
-      /* Apply pitch gains */
-   } else {
-      for (i=0;i<C*N;i++)
-         P[i] = 0;
+      compute_mdcts(st->mode, 0, st->out_mem+pitch_index*C, pitch_freq, C);
    }
 
    /* Decode fixed codebook and merge with pitch */
    if (C==1)
-      unquant_bands(st->mode, X, P, has_pitch, gains, bandE, pulses, shortBlocks, has_fold, len*8, &dec);
+      quant_bands(st->mode, start, X, bandE, pulses, shortBlocks, has_fold, 1, len*8, 0, &dec);
 #ifndef DISABLE_STEREO
    else
-      unquant_bands_stereo(st->mode, X, P, has_pitch, gains, bandE, pulses, shortBlocks, has_fold, len*8, &dec);
+      unquant_bands_stereo(st->mode, start, X, bandE, pulses, shortBlocks, has_fold, len*8, &dec);
 #endif
-   unquant_energy_finalise(st->mode, bandE, st->oldBandE, fine_quant, fine_priority, len*8-ec_dec_tell(&dec, 0), &dec);
+   unquant_energy_finalise(st->mode, start, bandE, st->oldBandE, fine_quant, fine_priority, len*8-ec_dec_tell(&dec, 0), &dec, C);
    
+   if (mdct_weight_shift)
+   {
+      mdct_shape(st->mode, X, 0, mdct_weight_pos+1, N, st->mode->nbShortMdcts, mdct_weight_shift, C);
+   }
+
    /* Synthesis */
-   denormalise_bands(st->mode, X, freq, bandE);
+   denormalise_bands(st->mode, X, freq, bandE, C);
 
 
    CELT_MOVE(st->decode_mem, st->decode_mem+C*N, C*(DECODE_BUFFER_SIZE+st->overlap-N));
-   if (mdct_weight_shift)
-   {
-      int m;
-      for (c=0;c<C;c++)
-         for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-            for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
-#ifdef FIXED_POINT
-               freq[i] = SHL32(freq[i], mdct_weight_shift);
-#else
-               freq[i] = (1<<mdct_weight_shift)*freq[i];
-#endif
-   }
-   /* Compute inverse MDCTs */
-   compute_inv_mdcts(st->mode, shortBlocks, freq, transient_time, transient_shift, st->out_mem);
 
-   for (c=0;c<C;c++)
-   {
-      int j;
-      for (j=0;j<N;j++)
-      {
-         celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
-                                preemph,st->preemph_memD[c]);
-         st->preemph_memD[c] = tmp;
-         pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
-      }
-   }
+   if (has_pitch)
+      apply_pitch(st->mode, freq, pitch_freq, gain_id, 0, C);
 
+   for (i=0;i<st->mode->eBands[start];i++)
+      freq[i] = 0;
+
+   /* Compute inverse MDCTs */
+   compute_inv_mdcts(st->mode, shortBlocks, freq, transient_time, transient_shift, st->out_mem, C);
+
+   deemphasis(st->out_mem, pcm, N, C, preemph, st->preemph_memD);
+   st->loss_count = 0;
    RESTORE_STACK;
    return 0;
-   /*printf ("\n");*/
 }
 
 #ifdef FIXED_POINT
@@ -1432,7 +1583,8 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
 int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int len, float * restrict pcm)
 {
    int j, ret, C, N;
-   VARDECL(celt_int16_t, out);
+   VARDECL(celt_int16, out);
+   SAVE_STACK;
 
    if (check_decoder(st) != CELT_OK)
       return CELT_INVALID_STATE;
@@ -1440,24 +1592,27 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
 
-   SAVE_STACK;
-   C = CHANNELS(st->mode);
-   N = st->block_size;
-   ALLOC(out, C*N, celt_int16_t);
+   if (pcm==NULL)
+      return CELT_BAD_ARG;
 
+   C = CHANNELS(st->channels);
+   N = st->block_size;
+   
+   ALLOC(out, C*N, celt_int16);
    ret=celt_decode(st, data, len, out);
-
    for (j=0;j<C*N;j++)
-     pcm[j]=out[j]*(1/32768.);
+      pcm[j]=out[j]*(1/32768.);
+     
    RESTORE_STACK;
    return ret;
 }
 #endif /*DISABLE_FLOAT_API*/
 #else
-int celt_decode(CELTDecoder * restrict st, const unsigned char *data, int len, celt_int16_t * restrict pcm)
+int celt_decode(CELTDecoder * restrict st, const unsigned char *data, int len, celt_int16 * restrict pcm)
 {
    int j, ret, C, N;
-   VARDECL(celt_sig_t, out);
+   VARDECL(celt_sig, out);
+   SAVE_STACK;
 
    if (check_decoder(st) != CELT_OK)
       return CELT_INVALID_STATE;
@@ -1465,16 +1620,18 @@ int celt_decode(CELTDecoder * restrict st, const unsigned char *data, int len, c
    if (check_mode(st->mode) != CELT_OK)
       return CELT_INVALID_MODE;
 
-   SAVE_STACK;
-   C = CHANNELS(st->mode);
+   if (pcm==NULL)
+      return CELT_BAD_ARG;
+
+   C = CHANNELS(st->channels);
    N = st->block_size;
-   ALLOC(out, C*N, celt_sig_t);
+   ALLOC(out, C*N, celt_sig);
 
    ret=celt_decode_float(st, data, len, out);
 
    for (j=0;j<C*N;j++)
-     pcm[j] = FLOAT2INT16 (out[j]);
-
+      pcm[j] = FLOAT2INT16 (out[j]);
+   
    RESTORE_STACK;
    return ret;
 }
@@ -1503,14 +1660,18 @@ int celt_decoder_ctl(CELTDecoder * restrict st, int request, ...)
       case CELT_RESET_STATE:
       {
          const CELTMode *mode = st->mode;
-         int C = mode->nbChannels;
+         int C = st->channels;
 
          CELT_MEMSET(st->decode_mem, 0, (DECODE_BUFFER_SIZE+st->overlap)*C);
          CELT_MEMSET(st->oldBandE, 0, C*mode->nbEBands);
 
          CELT_MEMSET(st->preemph_memD, 0, C);
 
-         st->last_pitch_index = 0;
+         st->loss_count = 0;
+
+#ifdef NEW_PLC
+         CELT_MEMSET(st->lpc, 0, C*LPC_ORDER);
+#endif
       }
       break;
       default:
@@ -1528,3 +1689,22 @@ bad_request:
       va_end(ap);
   return CELT_UNIMPLEMENTED;
 }
+
+const char *celt_strerror(int error)
+{
+   static const char *error_strings[8] = {
+      "success",
+      "invalid argument",
+      "invalid mode",
+      "internal error",
+      "corrupted stream",
+      "request not implemented",
+      "invalid state",
+      "memory allocation failed"
+   };
+   if (error > 0 || error < -7)
+      return "unknown error";
+   else 
+      return error_strings[-error];
+}
+