Note some more platforms where float-approx is tested, fix a bug in the prediction...
[opus.git] / libcelt / celt.c
index 9052040..5d32fdc 100644 (file)
@@ -312,7 +312,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t * re
          mdct_forward(lookup, x, tmp, mode->window, overlap);
          /* Interleaving the sub-frames */
          for (j=0;j<N;j++)
-            out[C*j+c] = tmp[j];
+            out[j+c*N] = tmp[j];
       }
       RESTORE_STACK;
    } else {
@@ -336,7 +336,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t * re
             mdct_forward(lookup, x, tmp, mode->window, overlap);
             /* Interleaving the sub-frames */
             for (j=0;j<N;j++)
-               out[C*(j*B+b)+c] = tmp[j];
+               out[(j*B+b)+c*N*B] = tmp[j];
          }
       }
       RESTORE_STACK;
@@ -367,7 +367,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t
          ALLOC(tmp, N, celt_word32_t);
          /* De-interleaving the sub-frames */
          for (j=0;j<N;j++)
-            tmp[j] = X[C*j+c];
+            tmp[j] = X[j+c*N];
          /* Prevents problems from the imdct doing the overlap-add */
          CELT_MEMSET(x+N4, 0, N);
          mdct_backward(lookup, tmp, x, mode->window, overlap);
@@ -397,7 +397,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig_t
          {
             /* De-interleaving the sub-frames */
             for (j=0;j<N2;j++)
-               tmp[j] = X[C*(j*B+b)+c];
+               tmp[j] = X[(j*B+b)+c*N2*B];
             mdct_backward(lookup, tmp, x+N4+N2*b, mode->window, overlap);
          }
          if (transient_shift > 0)
@@ -566,46 +566,39 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
       }
    }
    CELT_COPY(st->in_mem, in+C*(2*N-2*N4-st->overlap), C*st->overlap);
-   
+
    /* Transient handling */
-   if (st->mode->nbShortMdcts > 1)
+   transient_time = -1;
+   transient_shift = 0;
+   shortBlocks = 0;
+
+   if (st->mode->nbShortMdcts > 1 && transient_analysis(in, N+st->overlap, C, &transient_time, &transient_shift))
    {
-      if (transient_analysis(in, N+st->overlap, C, &transient_time, &transient_shift))
-      {
 #ifndef FIXED_POINT
-         float gain_1;
+      float gain_1;
 #endif
-         /* Apply the inverse shaping window */
-         if (transient_shift)
-         {
+      /* Apply the inverse shaping window */
+      if (transient_shift)
+      {
 #ifdef FIXED_POINT
-            for (c=0;c<C;c++)
-               for (i=0;i<16;i++)
-                  in[C*(transient_time+i-16)+c] = MULT16_32_Q15(EXTRACT16(SHR32(celt_rcp(Q15ONE+MULT16_16(transientWindow[i],((1<<transient_shift)-1))),1)), in[C*(transient_time+i-16)+c]);
-            for (c=0;c<C;c++)
-               for (i=transient_time;i<N+st->overlap;i++)
-                  in[C*i+c] = SHR32(in[C*i+c], transient_shift);
+         for (c=0;c<C;c++)
+            for (i=0;i<16;i++)
+               in[C*(transient_time+i-16)+c] = MULT16_32_Q15(EXTRACT16(SHR32(celt_rcp(Q15ONE+MULT16_16(transientWindow[i],((1<<transient_shift)-1))),1)), in[C*(transient_time+i-16)+c]);
+         for (c=0;c<C;c++)
+            for (i=transient_time;i<N+st->overlap;i++)
+               in[C*i+c] = SHR32(in[C*i+c], transient_shift);
 #else
-            for (c=0;c<C;c++)
-               for (i=0;i<16;i++)
-                  in[C*(transient_time+i-16)+c] /= 1+transientWindow[i]*((1<<transient_shift)-1);
-            gain_1 = 1./(1<<transient_shift);
-            for (c=0;c<C;c++)
-               for (i=transient_time;i<N+st->overlap;i++)
-                  in[C*i+c] *= gain_1;
+         for (c=0;c<C;c++)
+            for (i=0;i<16;i++)
+               in[C*(transient_time+i-16)+c] /= 1+transientWindow[i]*((1<<transient_shift)-1);
+         gain_1 = 1./(1<<transient_shift);
+         for (c=0;c<C;c++)
+            for (i=transient_time;i<N+st->overlap;i++)
+               in[C*i+c] *= gain_1;
 #endif
-         }
-         shortBlocks = 1;
-         has_fold = 1;
-      } else {
-         transient_time = -1;
-         transient_shift = 0;
-         shortBlocks = 0;
       }
-   } else {
-      transient_time = -1;
-      transient_shift = 0;
-      shortBlocks = 0;
+      shortBlocks = 1;
+      has_fold = 1;
    }
 
    ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
@@ -613,6 +606,7 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
    ALLOC(bandLogE,st->mode->nbEBands*C, celt_word16_t);
    /* Compute MDCTs */
    compute_mdcts(st->mode, shortBlocks, in, freq);
+
    if (shortBlocks && !transient_shift) 
    {
       celt_word32_t sum[4]={1,1,1,1};
@@ -622,7 +616,7 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
          m=0;
          do {
             celt_word32_t tmp=0;
-            for (i=m*C+c;i<N;i+=C*st->mode->nbShortMdcts)
+            for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
                tmp += ABS32(freq[i]);
             sum[m++] += tmp;
          } while (m<st->mode->nbShortMdcts);
@@ -645,7 +639,7 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
       {
          for (c=0;c<C;c++)
             for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-               for (i=m*C+c;i<N;i+=C*st->mode->nbShortMdcts)
+               for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
                   freq[i] = SHR32(freq[i],mdct_weight_shift);
       }
 #else
@@ -665,29 +659,23 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
       {
          for (c=0;c<C;c++)
             for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-               for (i=m*C+c;i<N;i+=C*st->mode->nbShortMdcts)
+               for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
                   freq[i] = (1./(1<<mdct_weight_shift))*freq[i];
       }
 #endif
-      /*printf ("%f\n", short_ratio);*/
-      /*if (short_ratio < 1)
-         short_ratio = 1;
-      short_ratio = 1<<(int)floor(.5+log2(short_ratio));
-      if (short_ratio>4)
-         short_ratio = 4;*/
-   }/* else if (transient_shift)
-      printf ("8\n");
-      else printf ("1\n");*/
+   }
 
    compute_band_energies(st->mode, freq, bandE);
    for (i=0;i<st->mode->nbEBands*C;i++)
       bandLogE[i] = amp2Log(bandE[i]);
 
-   intra_ener = (st->force_intra || st->delayedIntra);
+   /* Don't use intra energy when we're operating at low bit-rate */
+   intra_ener = st->force_intra || (st->delayedIntra && nbCompressedBytes > st->mode->nbEBands);
    if (shortBlocks || intra_decision(bandLogE, st->oldBandE, st->mode->nbEBands))
       st->delayedIntra = 1;
    else
       st->delayedIntra = 0;
+
    /* Pitch analysis: we do it early to save on the peak stack space */
    /* Don't use pitch if there isn't enough data available yet, 
       or if we're using shortBlocks */
@@ -763,9 +751,14 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
       compute_band_energies(st->mode, freq, bandEp);
       normalise_bands(st->mode, freq, P, bandEp);
       pitch_power = bandEp[0]+bandEp[1]+bandEp[2];
-      /* Check if we can safely use the pitch (i.e. effective gain 
-         isn't too high) */
       curr_power = bandE[0]+bandE[1]+bandE[2];
+      if (C>1)
+      {
+         pitch_power += bandEp[0+st->mode->nbEBands]+bandEp[1+st->mode->nbEBands]+bandEp[2+st->mode->nbEBands];
+         curr_power += bandE[0+st->mode->nbEBands]+bandE[1+st->mode->nbEBands]+bandE[2+st->mode->nbEBands];
+      }
+      /* Check if we can safely use the pitch (i.e. effective gain 
+      isn't too high) */
       if ((MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power))
       {
          /* Pitch prediction */
@@ -883,7 +876,7 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
          int m;
          for (c=0;c<C;c++)
             for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-               for (i=m*C+c;i<N;i+=C*st->mode->nbShortMdcts)
+               for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
 #ifdef FIXED_POINT
                   freq[i] = SHL32(freq[i], mdct_weight_shift);
 #else
@@ -1023,7 +1016,7 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
          {
             st->force_intra   = 1;
             st->pitch_permitted = 0;
-         } else if (value=1) {
+         } else if (value==1) {
             st->force_intra   = 0;
             st->pitch_permitted = 0;
          } else {
@@ -1237,7 +1230,7 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16_t * restrict
    while (offset+len >= MAX_PERIOD)
       offset -= pitch_index;
    compute_mdcts(st->mode, 0, st->out_mem+offset*C, freq);
-   for (i=0;i<N;i++)
+   for (i=0;i<C*N;i++)
       freq[i] = ADD32(EPSILON, MULT16_32_Q15(QCONST16(.9f,15),freq[i]));
 #endif
    
@@ -1407,7 +1400,7 @@ int celt_decode_float(CELTDecoder * restrict st, const unsigned char *data, int
       int m;
       for (c=0;c<C;c++)
          for (m=mdct_weight_pos+1;m<st->mode->nbShortMdcts;m++)
-            for (i=m*C+c;i<N;i+=C*st->mode->nbShortMdcts)
+            for (i=m+c*N;i<(c+1)*N;i+=st->mode->nbShortMdcts)
 #ifdef FIXED_POINT
                freq[i] = SHL32(freq[i], mdct_weight_shift);
 #else