Defining DISABLE_STEREO now optimises for the mono case
[opus.git] / libcelt / bands.c
index f52a7aa..3096743 100644 (file)
@@ -38,6 +38,7 @@
 #include "modes.h"
 #include "vq.h"
 #include "cwrs.h"
+#include "stack_alloc.h"
 #include "os_support.h"
 #include "mathops.h"
 
@@ -78,10 +79,9 @@ const celt_word16_t sqrtC_1[2] = {QCONST16(1.f, 14), QCONST16(1.414214f, 14)};
 /* Compute the amplitude (sqrt energy) in each of the bands */
 void compute_band_energies(const CELTMode *m, const celt_sig_t *X, celt_ener_t *bank)
 {
-   int i, c, B, C;
+   int i, c;
    const celt_int16_t *eBands = m->eBands;
-   B = m->nbMdctBlocks;
-   C = m->nbChannels;
+   const int C = CHANNELS(m);
    for (c=0;c<C;c++)
    {
       for (i=0;i<m->nbEBands;i++)
@@ -89,18 +89,18 @@ void compute_band_energies(const CELTMode *m, const celt_sig_t *X, celt_ener_t *
          int j;
          celt_word32_t maxval=0;
          celt_word32_t sum = 0;
-         for (j=B*eBands[i];j<B*eBands[i+1];j++)
+         for (j=eBands[i];j<eBands[i+1];j++)
             maxval = MAX32(maxval, ABS32(X[j*C+c]));
          if (maxval > 0)
          {
             int shift = celt_ilog2(maxval)-10;
-            for (j=B*eBands[i];j<B*eBands[i+1];j++)
-               sum += VSHR32(X[j*C+c],shift)*VSHR32(X[j*C+c],shift);
+            for (j=eBands[i];j<eBands[i+1];j++)
+               sum += MULT16_16(EXTRACT16(VSHR32(X[j*C+c],shift)),EXTRACT16(VSHR32(X[j*C+c],shift)));
             /* We're adding one here to make damn sure we never end up with a pitch vector that's
                larger than unity norm */
-            bank[i*C+c] = 1+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
+            bank[i*C+c] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
          } else {
-            bank[i*C+c] = 0;
+            bank[i*C+c] = EPSILON;
          }
          /*printf ("%f ", bank[i*C+c]);*/
       }
@@ -109,12 +109,11 @@ void compute_band_energies(const CELTMode *m, const celt_sig_t *X, celt_ener_t *
 }
 
 /* Normalise each band such that the energy is one. */
-void normalise_bands(const CELTMode *m, const celt_sig_t *freq, celt_norm_t *X, const celt_ener_t *bank)
+void normalise_bands(const CELTMode *m, const celt_sig_t * restrict freq, celt_norm_t * restrict X, const celt_ener_t *bank)
 {
-   int i, c, B, C;
+   int i, c;
    const celt_int16_t *eBands = m->eBands;
-   B = m->nbMdctBlocks;
-   C = m->nbChannels;
+   const int C = CHANNELS(m);
    for (c=0;c<C;c++)
    {
       for (i=0;i<m->nbEBands;i++)
@@ -122,49 +121,47 @@ void normalise_bands(const CELTMode *m, const celt_sig_t *freq, celt_norm_t *X,
          celt_word16_t g;
          int j,shift;
          celt_word16_t E;
-         shift = celt_ilog2(bank[i*C+c])-13;
+         shift = celt_zlog2(bank[i*C+c])-13;
          E = VSHR32(bank[i*C+c], shift);
-         if (E>0)
-            g = DIV32_16(SHL32(Q15ONE,13),MULT16_16_Q14(E,sqrtC_1[C-1]));
-         else
-            g = 0;
-         for (j=B*eBands[i];j<B*eBands[i+1];j++)
+         g = EXTRACT16(celt_rcp(SHR32(MULT16_16(E,sqrtC_1[C-1]),11)));
+         for (j=eBands[i];j<eBands[i+1];j++)
             X[j*C+c] = MULT16_16_Q14(VSHR32(freq[j*C+c],shift),g);
       }
    }
-   for (i=B*C*eBands[m->nbEBands];i<B*C*eBands[m->nbEBands+1];i++)
+   for (i=C*eBands[m->nbEBands];i<C*eBands[m->nbEBands+1];i++)
       X[i] = 0;
 }
 
-void renormalise_bands(const CELTMode *m, celt_norm_t *X)
+#ifndef DISABLE_STEREO
+void renormalise_bands(const CELTMode *m, celt_norm_t * restrict X)
 {
    int i;
    VARDECL(celt_ener_t, tmpE);
    VARDECL(celt_sig_t, freq);
    SAVE_STACK;
    ALLOC(tmpE, m->nbEBands*m->nbChannels, celt_ener_t);
-   ALLOC(freq, m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1], celt_sig_t);
-   for (i=0;i<m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1];i++)
+   ALLOC(freq, m->nbChannels*m->eBands[m->nbEBands+1], celt_sig_t);
+   for (i=0;i<m->nbChannels*m->eBands[m->nbEBands+1];i++)
       freq[i] = SHL32(EXTEND32(X[i]), 10);
    compute_band_energies(m, freq, tmpE);
    normalise_bands(m, freq, X, tmpE);
    RESTORE_STACK;
 }
-#else
+#endif /* DISABLE_STEREO */
+#else /* FIXED_POINT */
 /* Compute the amplitude (sqrt energy) in each of the bands */
 void compute_band_energies(const CELTMode *m, const celt_sig_t *X, celt_ener_t *bank)
 {
-   int i, c, B, C;
+   int i, c;
    const celt_int16_t *eBands = m->eBands;
-   B = m->nbMdctBlocks;
-   C = m->nbChannels;
+   const int C = CHANNELS(m);
    for (c=0;c<C;c++)
    {
       for (i=0;i<m->nbEBands;i++)
       {
          int j;
          celt_word32_t sum = 1e-10;
-         for (j=B*eBands[i];j<B*eBands[i+1];j++)
+         for (j=eBands[i];j<eBands[i+1];j++)
             sum += X[j*C+c]*X[j*C+c];
          bank[i*C+c] = sqrt(sum);
          /*printf ("%f ", bank[i*C+c]);*/
@@ -174,27 +171,27 @@ void compute_band_energies(const CELTMode *m, const celt_sig_t *X, celt_ener_t *
 }
 
 /* Normalise each band such that the energy is one. */
-void normalise_bands(const CELTMode *m, const celt_sig_t *freq, celt_norm_t *X, const celt_ener_t *bank)
+void normalise_bands(const CELTMode *m, const celt_sig_t * restrict freq, celt_norm_t * restrict X, const celt_ener_t *bank)
 {
-   int i, c, B, C;
+   int i, c;
    const celt_int16_t *eBands = m->eBands;
-   B = m->nbMdctBlocks;
-   C = m->nbChannels;
+   const int C = CHANNELS(m);
    for (c=0;c<C;c++)
    {
       for (i=0;i<m->nbEBands;i++)
       {
          int j;
          celt_word16_t g = 1.f/(1e-10+bank[i*C+c]*sqrt(C));
-         for (j=B*eBands[i];j<B*eBands[i+1];j++)
+         for (j=eBands[i];j<eBands[i+1];j++)
             X[j*C+c] = freq[j*C+c]*g;
       }
    }
-   for (i=B*C*eBands[m->nbEBands];i<B*C*eBands[m->nbEBands+1];i++)
+   for (i=C*eBands[m->nbEBands];i<C*eBands[m->nbEBands+1];i++)
       X[i] = 0;
 }
 
-void renormalise_bands(const CELTMode *m, celt_norm_t *X)
+#ifndef DISABLE_STEREO
+void renormalise_bands(const CELTMode *m, celt_norm_t * restrict X)
 {
    VARDECL(celt_ener_t, tmpE);
    SAVE_STACK;
@@ -203,15 +200,15 @@ void renormalise_bands(const CELTMode *m, celt_norm_t *X)
    normalise_bands(m, X, X, tmpE);
    RESTORE_STACK;
 }
-#endif
+#endif /* DISABLE_STEREO */
+#endif /* FIXED_POINT */
 
 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
-void denormalise_bands(const CELTMode *m, const celt_norm_t *X, celt_sig_t *freq, const celt_ener_t *bank)
+void denormalise_bands(const CELTMode *m, const celt_norm_t * restrict X, celt_sig_t * restrict freq, const celt_ener_t *bank)
 {
-   int i, c, B, C;
+   int i, c;
    const celt_int16_t *eBands = m->eBands;
-   B = m->nbMdctBlocks;
-   C = m->nbChannels;
+   const int C = CHANNELS(m);
    if (C>2)
       celt_fatal("denormalise_bands() not implemented for >2 channels");
    for (c=0;c<C;c++)
@@ -220,11 +217,11 @@ void denormalise_bands(const CELTMode *m, const celt_norm_t *X, celt_sig_t *freq
       {
          int j;
          celt_word32_t g = MULT16_32_Q14(sqrtC_1[C-1],bank[i*C+c]);
-         for (j=B*eBands[i];j<B*eBands[i+1];j++)
+         for (j=eBands[i];j<eBands[i+1];j++)
             freq[j*C+c] = MULT16_32_Q14(X[j*C+c], g);
       }
    }
-   for (i=B*C*eBands[m->nbEBands];i<B*C*eBands[m->nbEBands+1];i++)
+   for (i=C*eBands[m->nbEBands];i<C*eBands[m->nbEBands+1];i++)
       freq[i] = 0;
 }
 
@@ -232,16 +229,16 @@ void denormalise_bands(const CELTMode *m, const celt_norm_t *X, celt_sig_t *freq
 /* Compute the best gain for each "pitch band" */
 void compute_pitch_gain(const CELTMode *m, const celt_norm_t *X, const celt_norm_t *P, celt_pgain_t *gains)
 {
-   int i, B;
+   int i;
    const celt_int16_t *pBands = m->pBands;
-   B = m->nbMdctBlocks*m->nbChannels;
-   
+   const int C = CHANNELS(m);
+
    for (i=0;i<m->nbPBands;i++)
    {
       celt_word32_t Sxy=0, Sxx=0;
       int j;
       /* We know we're not going to overflow because Sxx can't be more than 1 (Q28) */
-      for (j=B*pBands[i];j<B*pBands[i+1];j++)
+      for (j=C*pBands[i];j<C*pBands[i+1];j++)
       {
          Sxy = MAC16_16(Sxy, X[j], P[j]);
          Sxx = MAC16_16(Sxx, X[j], X[j]);
@@ -256,7 +253,7 @@ void compute_pitch_gain(const CELTMode *m, const celt_norm_t *X, const celt_norm
          residual doesn't quantise well */
       Sxy = MULT16_32_Q15(QCONST16(.9f, 15), Sxy);
       /* gain = Sxy/Sxx */
-      gains[i] = DIV32_16(Sxy,ADD32(SHR32(Sxx, PGAIN_SHIFT),EPSILON));
+      gains[i] = EXTRACT16(celt_div(Sxy,ADD32(SHR32(Sxx, PGAIN_SHIFT),EPSILON)));
       /*printf ("%f ", 1-sqrt(1-gain*gain));*/
    }
    /*if(rand()%10==0)
@@ -268,39 +265,39 @@ void compute_pitch_gain(const CELTMode *m, const celt_norm_t *X, const celt_norm
 }
 
 /* Apply the (quantised) gain to each "pitch band" */
-void pitch_quant_bands(const CELTMode *m, celt_norm_t *P, const celt_pgain_t *gains)
+void pitch_quant_bands(const CELTMode *m, celt_norm_t * restrict P, const celt_pgain_t * restrict gains)
 {
-   int i, B;
+   int i;
    const celt_int16_t *pBands = m->pBands;
-   B = m->nbMdctBlocks*m->nbChannels;
+   const int C = CHANNELS(m);
    for (i=0;i<m->nbPBands;i++)
    {
       int j;
-      for (j=B*pBands[i];j<B*pBands[i+1];j++)
+      for (j=C*pBands[i];j<C*pBands[i+1];j++)
          P[j] = MULT16_16_Q15(gains[i], P[j]);
       /*printf ("%f ", gain);*/
    }
-   for (i=B*pBands[m->nbPBands];i<B*pBands[m->nbPBands+1];i++)
+   for (i=C*pBands[m->nbPBands];i<C*pBands[m->nbPBands+1];i++)
       P[i] = 0;
 }
 
 
 /* Quantisation of the residual */
-void quant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, celt_mask_t *W, int total_bits, ec_enc *enc)
+void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, int total_bits, ec_enc *enc)
 {
-   int i, j, B, bits;
+   int i, j, bits;
    const celt_int16_t *eBands = m->eBands;
-   celt_word16_t alpha;
-   VARDECL(celt_norm_t, norm);
+   celt_norm_t * restrict norm;
+   VARDECL(celt_norm_t, _norm);
    VARDECL(int, pulses);
    VARDECL(int, offsets);
+   const int C = CHANNELS(m);
    SAVE_STACK;
 
-   B = m->nbMdctBlocks*m->nbChannels;
-   
-   ALLOC(norm, B*eBands[m->nbEBands+1], celt_norm_t);
+   ALLOC(_norm, C*eBands[m->nbEBands+1], celt_norm_t);
    ALLOC(pulses, m->nbEBands, int);
    ALLOC(offsets, m->nbEBands, int);
+   norm = _norm;
 
    for (i=0;i<m->nbEBands;i++)
       offsets[i] = 0;
@@ -318,53 +315,54 @@ void quant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, celt_mask_t
       int q;
       celt_word16_t n;
       q = pulses[i];
-      n = SHL16(celt_sqrt(B*(eBands[i+1]-eBands[i])),11);
+      n = SHL16(celt_sqrt(C*(eBands[i+1]-eBands[i])),11);
 
       /* If pitch isn't available, use intra-frame prediction */
       if (eBands[i] >= m->pitchEnd || q<=0)
       {
          q -= 1;
-         alpha = 0;
          if (q<0)
-            intra_fold(X+B*eBands[i], B*(eBands[i+1]-eBands[i]), norm, P+B*eBands[i], B, eBands[i], eBands[m->nbEBands+1]);
+            intra_fold(X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], C, eBands[i], eBands[m->nbEBands+1]);
          else
-            intra_prediction(X+B*eBands[i], W+B*eBands[i], B*(eBands[i+1]-eBands[i]), q, norm, P+B*eBands[i], B, eBands[i], enc);
-      } else {
-         alpha = QCONST16(.7f,15);
+            intra_prediction(X+C*eBands[i], W+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], C, eBands[i], enc);
       }
       
       if (q > 0)
       {
-         int nb_rotations = (B*(eBands[i+1]-eBands[i])+4*q)/(8*q);
-         exp_rotation(P+B*eBands[i], B*(eBands[i+1]-eBands[i]), -1, B, nb_rotations);
-         exp_rotation(X+B*eBands[i], B*(eBands[i+1]-eBands[i]), -1, B, nb_rotations);
-         alg_quant(X+B*eBands[i], W+B*eBands[i], B*(eBands[i+1]-eBands[i]), q, P+B*eBands[i], alpha, enc);
-         exp_rotation(X+B*eBands[i], B*(eBands[i+1]-eBands[i]), 1, B, nb_rotations);
+         int nb_rotations = q <= 2*C ? 2*C/q : 0;
+         if (nb_rotations != 0)
+         {
+            exp_rotation(P+C*eBands[i], C*(eBands[i+1]-eBands[i]), -1, C, nb_rotations);
+            exp_rotation(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), -1, C, nb_rotations);
+         }
+         alg_quant(X+C*eBands[i], W+C*eBands[i], C*(eBands[i+1]-eBands[i]), q, P+C*eBands[i], enc);
+         if (nb_rotations != 0)
+            exp_rotation(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), 1, C, nb_rotations);
       }
-      for (j=B*eBands[i];j<B*eBands[i+1];j++)
+      for (j=C*eBands[i];j<C*eBands[i+1];j++)
          norm[j] = MULT16_16_Q15(n,X[j]);
    }
-   for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
+   for (i=C*eBands[m->nbEBands];i<C*eBands[m->nbEBands+1];i++)
       X[i] = 0;
    RESTORE_STACK;
 }
 
 /* Decoding of the residual */
-void unquant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, int total_bits, ec_dec *dec)
+void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, int total_bits, ec_dec *dec)
 {
-   int i, j, B, bits;
+   int i, j, bits;
    const celt_int16_t *eBands = m->eBands;
-   celt_word16_t alpha;
-   VARDECL(celt_norm_t, norm);
+   celt_norm_t * restrict norm;
+   VARDECL(celt_norm_t, _norm);
    VARDECL(int, pulses);
    VARDECL(int, offsets);
+   const int C = CHANNELS(m);
    SAVE_STACK;
 
-   B = m->nbMdctBlocks*m->nbChannels;
-   
-   ALLOC(norm, B*eBands[m->nbEBands+1], celt_norm_t);
+   ALLOC(_norm, C*eBands[m->nbEBands+1], celt_norm_t);
    ALLOC(pulses, m->nbEBands, int);
    ALLOC(offsets, m->nbEBands, int);
+   norm = _norm;
 
    for (i=0;i<m->nbEBands;i++)
       offsets[i] = 0;
@@ -377,42 +375,41 @@ void unquant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, int total_
       int q;
       celt_word16_t n;
       q = pulses[i];
-      n = SHL16(celt_sqrt(B*(eBands[i+1]-eBands[i])),11);
+      n = SHL16(celt_sqrt(C*(eBands[i+1]-eBands[i])),11);
 
       /* If pitch isn't available, use intra-frame prediction */
       if (eBands[i] >= m->pitchEnd || q<=0)
       {
          q -= 1;
-         alpha = 0;
          if (q<0)
-            intra_fold(X+B*eBands[i], B*(eBands[i+1]-eBands[i]), norm, P+B*eBands[i], B, eBands[i], eBands[m->nbEBands+1]);
+            intra_fold(X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], C, eBands[i], eBands[m->nbEBands+1]);
          else
-            intra_unquant(X+B*eBands[i], B*(eBands[i+1]-eBands[i]), q, norm, P+B*eBands[i], B, eBands[i], dec);
-      } else {
-         alpha = QCONST16(.7f,15);
+            intra_unquant(X+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], C, eBands[i], dec);
       }
       
       if (q > 0)
       {
-         int nb_rotations = (B*(eBands[i+1]-eBands[i])+4*q)/(8*q);
-         exp_rotation(P+B*eBands[i], B*(eBands[i+1]-eBands[i]), -1, B, nb_rotations);
-         alg_unquant(X+B*eBands[i], B*(eBands[i+1]-eBands[i]), q, P+B*eBands[i], alpha, dec);
-         exp_rotation(X+B*eBands[i], B*(eBands[i+1]-eBands[i]), 1, B, nb_rotations);
+         int nb_rotations = q <= 2*C ? 2*C/q : 0;
+         if (nb_rotations != 0)
+            exp_rotation(P+C*eBands[i], C*(eBands[i+1]-eBands[i]), -1, C, nb_rotations);
+         alg_unquant(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), q, P+C*eBands[i], dec);
+         if (nb_rotations != 0)
+            exp_rotation(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), 1, C, nb_rotations);
       }
-      for (j=B*eBands[i];j<B*eBands[i+1];j++)
+      for (j=C*eBands[i];j<C*eBands[i+1];j++)
          norm[j] = MULT16_16_Q15(n,X[j]);
    }
-   for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
+   for (i=C*eBands[m->nbEBands];i<C*eBands[m->nbEBands+1];i++)
       X[i] = 0;
    RESTORE_STACK;
 }
 
+#ifndef DISABLE_STEREO
 void stereo_mix(const CELTMode *m, celt_norm_t *X, const celt_ener_t *bank, int dir)
 {
-   int i, B, C;
+   int i;
    const celt_int16_t *eBands = m->eBands;
-   B = m->nbMdctBlocks;
-   C = m->nbChannels;
+   const int C = CHANNELS(m);
    for (i=0;i<m->nbEBands;i++)
    {
       int j;
@@ -420,14 +417,14 @@ void stereo_mix(const CELTMode *m, celt_norm_t *X, const celt_ener_t *bank, int
       celt_word16_t a1, a2;
       celt_word16_t norm;
 #ifdef FIXED_POINT
-      int shift = celt_ilog2(MAX32(bank[i*C], bank[i*C+1]))-13;
+      int shift = celt_zlog2(MAX32(bank[i*C], bank[i*C+1]))-13;
 #endif
       left = VSHR32(bank[i*C],shift);
       right = VSHR32(bank[i*C+1],shift);
       norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
       a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
       a2 = dir*DIV32_16(SHL32(EXTEND32(right),14),norm);
-      for (j=B*eBands[i];j<B*eBands[i+1];j++)
+      for (j=eBands[i];j<eBands[i+1];j++)
       {
          celt_norm_t r, l;
          l = X[j*C];
@@ -436,7 +433,7 @@ void stereo_mix(const CELTMode *m, celt_norm_t *X, const celt_ener_t *bank, int
          X[j*C+1] = MULT16_16_Q14(a1,r) - MULT16_16_Q14(a2,l);
       }
    }
-   for (i=B*C*eBands[m->nbEBands];i<B*C*eBands[m->nbEBands+1];i++)
+   for (i=C*eBands[m->nbEBands];i<C*eBands[m->nbEBands+1];i++)
       X[i] = 0;
-
 }
+#endif
\ No newline at end of file