Make it possible to ignore inverted phase stereo for downmix purposes
[opus.git] / celt / bands.c
index 5088ee8..bbe8a4c 100644 (file)
@@ -360,6 +360,30 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
    }
 }
 
+/* Compute the weights to use for optimizing normalized distortion across
+   channels. We use the amplitude to weight square distortion, which means
+   that we use the square root of the value we would have been using if we
+   wanted to minimize the MSE in the non-normalized domain. This roughly
+   corresponds to some quick-and-dirty perceptual experiments I ran to
+   measure inter-aural masking (there doesn't seem to be any published data
+   on the topic). */
+static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
+{
+   celt_ener minE;
+#if FIXED_POINT
+   int shift;
+#endif
+   minE = MIN32(Ex, Ey);
+   /* Adjustment to make the weights a bit more conservative. */
+   Ex = ADD32(Ex, minE/3);
+   Ey = ADD32(Ey, minE/3);
+#if FIXED_POINT
+   shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
+#endif
+   w[0] = VSHR32(Ex, shift);
+   w[1] = VSHR32(Ey, shift);
+}
+
 static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
 {
    int i = bandID;
@@ -659,6 +683,7 @@ struct band_ctx {
    opus_uint32 seed;
    int arch;
    int theta_round;
+   int disable_inv;
 };
 
 struct split_ctx {
@@ -718,11 +743,18 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
       if (encode)
       {
          if (!stereo || ctx->theta_round == 0)
+         {
             itheta = (itheta*(opus_int32)qn+8192)>>14;
-         else if (ctx->theta_round < 0)
-            itheta = (itheta*(opus_int32)qn)>>14;
-         else
-            itheta = (itheta*(opus_int32)qn+16383)>>14;
+         } else {
+            int down;
+            /* Bias quantization towards itheta=0 and itheta=16384. */
+            int bias = itheta > 8192 ? 32767/qn : -32767/qn;
+            down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
+            if (ctx->theta_round < 0)
+               itheta = down;
+            else
+               itheta = down+1;
+         }
       }
       /* Entropy coding of the angle. We use a uniform pdf for the
          time split, a step for stereo, and a triangular one for the rest. */
@@ -801,7 +833,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
    } else if (stereo) {
       if (encode)
       {
-         inv = itheta > 8192;
+         inv = itheta > 8192 && !ctx->disable_inv;
          if (inv)
          {
             int j;
@@ -818,6 +850,9 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
             inv = ec_dec_bit_logp(ec, 2);
       } else
          inv = 0;
+      /* inv flag override to avoid problems with downmixing. */
+      if (ctx->disable_inv)
+         inv = 0;
       itheta = 0;
    }
    qalloc = ec_tell_frac(ec) - tell;
@@ -1005,7 +1040,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
          /* Finally do the actual quantization */
          if (encode)
          {
-            cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth);
+            cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
          } else {
             cm = alg_unquant(X, N, K, spread, B, ec, gain);
          }
@@ -1330,13 +1365,25 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
    return cm;
 }
 
+static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
+{
+   int n1, n2;
+   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
+   n1 = M*(eBands[start+1]-eBands[start]);
+   n2 = M*(eBands[start+2]-eBands[start+1]);
+   /* Duplicate enough of the first band folding data to be able to fold the second band.
+      Copies no data for CELT-only mode. */
+   OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
+   if (dual_stereo)
+      OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
+}
 
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
       celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
       const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
       int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
       opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
-      opus_uint32 *seed, int complexity, int arch)
+      opus_uint32 *seed, int complexity, int arch, int disable_inv)
 {
    int i;
    opus_int32 remaining_bits;
@@ -1346,6 +1393,9 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
    VARDECL(celt_norm, _lowband_scratch);
    VARDECL(celt_norm, X_save);
    VARDECL(celt_norm, Y_save);
+   VARDECL(celt_norm, X_save2);
+   VARDECL(celt_norm, Y_save2);
+   VARDECL(celt_norm, norm_save2);
    int resynth_alloc;
    celt_norm *lowband_scratch;
    int B;
@@ -1386,6 +1436,9 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
       lowband_scratch = X_+M*eBands[m->nbEBands-1];
    ALLOC(X_save, resynth_alloc, celt_norm);
    ALLOC(Y_save, resynth_alloc, celt_norm);
+   ALLOC(X_save2, resynth_alloc, celt_norm);
+   ALLOC(Y_save2, resynth_alloc, celt_norm);
+   ALLOC(norm_save2, resynth_alloc, celt_norm);
 
    lowband_offset = 0;
    ctx.bandE = bandE;
@@ -1396,6 +1449,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
    ctx.seed = *seed;
    ctx.spread = spread;
    ctx.arch = arch;
+   ctx.disable_inv = disable_inv;
    ctx.resynth = resynth;
    ctx.theta_round = 0;
    for (i=start;i<end;i++)
@@ -1435,8 +1489,15 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
          b = 0;
       }
 
+#ifdef ENABLE_UPDATE_DRAFT
+      if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
+            lowband_offset = i;
+      if (i == start+1)
+         special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
+#else
       if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
             lowband_offset = i;
+#endif
 
       tf_change = tf_res[i];
       ctx.tf_change = tf_change;
@@ -1462,7 +1523,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
          fold_start = lowband_offset;
          while(M*eBands[--fold_start] > effective_lowband+norm_offset);
          fold_end = lowband_offset-1;
+#ifdef ENABLE_UPDATE_DRAFT
+         while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
+#else
          while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
+#endif
          x_cm = y_cm = 0;
          fold_i = fold_start; do {
            x_cm |= collapse_masks[fold_i*C+0];
@@ -1495,12 +1560,17 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
       } else {
          if (Y!=NULL)
          {
-            if (theta_rdo)
+            if (theta_rdo && i < intensity)
             {
-               ec_ctx ec_save;
-               struct band_ctx ctx_save;
+               ec_ctx ec_save, ec_save2;
+               struct band_ctx ctx_save, ctx_save2;
                opus_val32 dist0, dist1;
-               unsigned cm;
+               unsigned cm, cm2;
+               int nstart_bytes, nend_bytes, save_bytes;
+               unsigned char *bytes_buf;
+               unsigned char bytes_save[1275];
+               opus_val16 w[2];
+               compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
                /* Make a copy. */
                cm = x_cm|y_cm;
                ec_save = *ec;
@@ -1512,28 +1582,45 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
-               dist0 = celt_inner_prod(X_save, X, N, arch) + celt_inner_prod(Y_save, Y, N, arch);
+               dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+
+               /* Save first result. */
+               cm2 = x_cm;
+               ec_save2 = *ec;
+               ctx_save2 = ctx;
+               OPUS_COPY(X_save2, X, N);
+               OPUS_COPY(Y_save2, Y, N);
+               if (!last)
+                  OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
+               nstart_bytes = ec_save.offs;
+               nend_bytes = ec_save.storage;
+               bytes_buf = ec_save.buf+nstart_bytes;
+               save_bytes = nend_bytes-nstart_bytes;
+               OPUS_COPY(bytes_save, bytes_buf, save_bytes);
+
                /* Restore */
                *ec = ec_save;
                ctx = ctx_save;
                OPUS_COPY(X, X_save, N);
                OPUS_COPY(Y, Y_save, N);
+               if (i == start+1)
+                  special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
                /* Encode and round up. */
                ctx.theta_round = 1;
                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
-               dist1 = celt_inner_prod(X_save, X, N, arch) + celt_inner_prod(Y_save, Y, N, arch);
-               /* Restore */
-               *ec = ec_save;
-               ctx = ctx_save;
-               OPUS_COPY(X, X_save, N);
-               OPUS_COPY(Y, Y_save, N);
-               /* Encode with best choice. */
-               ctx.theta_round = dist0 >= dist1 ? -1 : 1;
-               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
-                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
-                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
+               dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
+               if (dist0 >= dist1) {
+                  x_cm = cm2;
+                  *ec = ec_save2;
+                  ctx = ctx_save2;
+                  OPUS_COPY(X, X_save2, N);
+                  OPUS_COPY(Y, Y_save2, N);
+                  if (!last)
+                     OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
+                  OPUS_COPY(bytes_buf, bytes_save, save_bytes);
+               }
             } else {
                ctx.theta_round = 0;
                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,