1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2008-2009 Gregory Maxwell
4 Written by Jean-Marc Valin and Gregory Maxwell */
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 - Neither the name of the Xiph.org Foundation nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #include "stack_alloc.h"
44 #include "os_support.h"
48 /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
49 with this approximation is important because it has an impact on the bit allocation */
50 static celt_int16 bitexact_cos(celt_int16 x)
54 tmp = (4096+((celt_int32)(x)*(x)))>>13;
58 x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
66 /* Compute the amplitude (sqrt energy) in each of the bands */
67 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int end, int _C, int M)
70 const celt_int16 *eBands = m->eBands;
71 const int C = CHANNELS(_C);
72 N = M*m->shortMdctSize;
82 maxval = MAX32(maxval, X[j+c*N]);
83 maxval = MAX32(maxval, -X[j+c*N]);
84 } while (++j<M*eBands[i+1]);
88 int shift = celt_ilog2(maxval)-10;
90 sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
91 EXTRACT16(VSHR32(X[j+c*N],shift)));
92 } while (++j<M*eBands[i+1]);
93 /* We're adding one here to make damn sure we never end up with a pitch vector that's
94 larger than unity norm */
95 bank[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
97 bank[i+c*m->nbEBands] = EPSILON;
99 /*printf ("%f ", bank[i+c*m->nbEBands]);*/
105 /* Normalise each band such that the energy is one. */
106 void normalise_bands(const CELTMode *m, const celt_sig * restrict freq, celt_norm * restrict X, const celt_ener *bank, int end, int _C, int M)
109 const celt_int16 *eBands = m->eBands;
110 const int C = CHANNELS(_C);
111 N = M*m->shortMdctSize;
118 shift = celt_zlog2(bank[i+c*m->nbEBands])-13;
119 E = VSHR32(bank[i+c*m->nbEBands], shift);
120 g = EXTRACT16(celt_rcp(SHL32(E,3)));
122 X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
123 } while (++j<M*eBands[i+1]);
128 #else /* FIXED_POINT */
129 /* Compute the amplitude (sqrt energy) in each of the bands */
130 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bank, int end, int _C, int M)
133 const celt_int16 *eBands = m->eBands;
134 const int C = CHANNELS(_C);
135 N = M*m->shortMdctSize;
141 celt_word32 sum = 1e-10f;
142 for (j=M*eBands[i];j<M*eBands[i+1];j++)
143 sum += X[j+c*N]*X[j+c*N];
144 bank[i+c*m->nbEBands] = celt_sqrt(sum);
145 /*printf ("%f ", bank[i+c*m->nbEBands]);*/
151 /* Normalise each band such that the energy is one. */
152 void normalise_bands(const CELTMode *m, const celt_sig * restrict freq, celt_norm * restrict X, const celt_ener *bank, int end, int _C, int M)
155 const celt_int16 *eBands = m->eBands;
156 const int C = CHANNELS(_C);
157 N = M*m->shortMdctSize;
163 celt_word16 g = 1.f/(1e-10f+bank[i+c*m->nbEBands]);
164 for (j=M*eBands[i];j<M*eBands[i+1];j++)
165 X[j+c*N] = freq[j+c*N]*g;
170 #endif /* FIXED_POINT */
172 void renormalise_bands(const CELTMode *m, celt_norm * restrict X, int end, int _C, int M)
175 const celt_int16 *eBands = m->eBands;
176 const int C = CHANNELS(_C);
180 renormalise_vector(X+M*eBands[i]+c*M*m->shortMdctSize, M*eBands[i+1]-M*eBands[i], Q15ONE);
185 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
186 void denormalise_bands(const CELTMode *m, const celt_norm * restrict X, celt_sig * restrict freq, const celt_ener *bank, int end, int _C, int M)
189 const celt_int16 *eBands = m->eBands;
190 const int C = CHANNELS(_C);
191 N = M*m->shortMdctSize;
192 celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
195 celt_sig * restrict f;
196 const celt_norm * restrict x;
202 celt_word32 g = SHR32(bank[i+c*m->nbEBands],1);
204 band_end = M*eBands[i+1];
206 *f++ = SHL32(MULT16_32_Q15(*x, g),2);
208 } while (++j<band_end);
210 for (i=M*eBands[m->nbEBands];i<N;i++)
215 static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bank, int bandID, int N)
220 celt_word16 left, right;
223 int shift = celt_zlog2(MAX32(bank[i], bank[i+m->nbEBands]))-13;
225 left = VSHR32(bank[i],shift);
226 right = VSHR32(bank[i+m->nbEBands],shift);
227 norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
228 a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
229 a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
235 X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
236 /* Side is not encoded, no need to calculate */
240 static void stereo_split(celt_norm *X, celt_norm *Y, int N)
246 l = MULT16_16_Q15(QCONST16(.70711f,15), X[j]);
247 r = MULT16_16_Q15(QCONST16(.70711f,15), Y[j]);
253 static void stereo_merge(celt_norm *X, celt_norm *Y, celt_word16 mid, celt_word16 side, int N)
261 celt_word32 t, lgain, rgain;
263 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
265 xp = MAC16_16(xp, X[j], Y[j]);
266 /* mid and side are in Q15, not Q14 like X and Y */
268 side = SHR32(side, 1);
269 El = MULT16_16(mid, mid) + MULT16_16(side, side) - 2*xp;
270 Er = MULT16_16(mid, mid) + MULT16_16(side, side) + 2*xp;
277 kl = celt_ilog2(El)>>1;
278 kr = celt_ilog2(Er)>>1;
280 t = VSHR32(El, (kl-7)<<1);
281 lgain = celt_rsqrt_norm(t);
282 t = VSHR32(Er, (kr-7)<<1);
283 rgain = celt_rsqrt_norm(t);
297 X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
298 Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
302 /* Decide whether we should spread the pulses in the current frame */
303 int folding_decision(const CELTMode *m, celt_norm *X, int *average, int *last_decision, int end, int _C, int M)
306 int sum = 0, nbBands=0;
307 const int C = CHANNELS(_C);
308 const celt_int16 * restrict eBands = m->eBands;
311 N0 = M*m->shortMdctSize;
313 if (M*(eBands[end]-eBands[end-1]) <= 8)
321 celt_norm * restrict x = X+M*eBands[i]+c*N0;
322 N = M*(eBands[i+1]-eBands[i]);
325 /* Compute rough CDF of |x[j]| */
328 celt_word32 x2N; /* Q13 */
330 x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
331 if (x2N < QCONST16(0.25f,13))
333 if (x2N < QCONST16(0.0625f,13))
335 if (x2N < QCONST16(0.015625f,13))
339 tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
345 /* Recursive averaging */
346 sum = (sum+*average)>>1;
349 sum = (3*sum + ((*last_decision<<7) + 64) + 2)>>2;
350 /* decision and last_decision do not use the same encoding */
355 } else if (sum < 256)
359 } else if (sum < 384)
370 #ifdef MEASURE_NORM_MSE
374 int MSECount[30] = {0};
376 void dump_norm_mse(void)
379 for (i=0;i<nbMSEBands;i++)
381 printf ("%g ", MSE[i]/MSECount[i]);
386 void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C)
392 atexit(dump_norm_mse);
395 for (i=0;i<m->nbEBands;i++)
400 if (bandE0[i]<10 || (C==2 && bandE0[i+m->nbEBands]<1))
404 g = bandE[i+c*m->nbEBands]/(1e-15+bandE0[i+c*m->nbEBands]);
405 for (j=M*m->eBands[i];j<M*m->eBands[i+1];j++)
406 MSE[i] += (g*X[j+c*N]-X0[j+c*N])*(g*X[j+c*N]-X0[j+c*N]);
410 nbMSEBands = m->nbEBands;
415 static void interleave_vector(celt_norm *X, int N0, int stride)
418 VARDECL(celt_norm, tmp);
422 ALLOC(tmp, N, celt_norm);
423 for (i=0;i<stride;i++)
425 tmp[j*stride+i] = X[i*N0+j];
431 static void deinterleave_vector(celt_norm *X, int N0, int stride)
434 VARDECL(celt_norm, tmp);
438 ALLOC(tmp, N, celt_norm);
439 for (i=0;i<stride;i++)
441 tmp[i*N0+j] = X[j*stride+i];
447 void haar1(celt_norm *X, int N0, int stride)
451 for (i=0;i<stride;i++)
454 celt_norm tmp1, tmp2;
455 tmp1 = MULT16_16_Q15(QCONST16(.7070678f,15), X[stride*2*j+i]);
456 tmp2 = MULT16_16_Q15(QCONST16(.7070678f,15), X[stride*(2*j+1)+i]);
457 X[stride*2*j+i] = tmp1 + tmp2;
458 X[stride*(2*j+1)+i] = tmp1 - tmp2;
462 static int compute_qn(int N, int b, int offset, int stereo)
464 static const celt_int16 exp2_table8[8] =
465 {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
470 qb = (b+N2*offset)/N2;
471 if (qb > (b>>1)-(1<<BITRES))
472 qb = (b>>1)-(1<<BITRES);
479 if (qb<(1<<BITRES>>1)) {
482 qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
485 celt_assert(qn <= 256);
490 /* This function is responsible for encoding and decoding a band for both
491 the mono and stereo case. Even in the mono case, it can split the band
492 in two and transmit the energy difference with the two half-bands. It
493 can be called recursively so bands can end up being split in 8 parts. */
494 static void quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y,
495 int N, int b, int spread, int B, int tf_change, celt_norm *lowband, int resynth, void *ec,
496 celt_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level,
497 celt_int32 *seed, celt_word16 gain, celt_norm *lowband_scratch)
509 celt_word16 mid=0, side=0;
514 split = stereo = Y != NULL;
516 /* Special case for one sample */
521 for (c=0;c<1+stereo;c++)
524 if (b>=1<<BITRES && *remaining_bits>=1<<BITRES)
529 ec_enc_bits((ec_enc*)ec, sign, 1);
531 sign = ec_dec_bits((ec_dec*)ec, 1);
533 *remaining_bits -= 1<<BITRES;
537 x[0] = sign ? -NORM_SCALING : NORM_SCALING;
541 lowband_out[0] = X[0];
545 if (!stereo && level == 0)
549 recombine = tf_change;
550 /* Band recombining to increase frequency resolution */
552 if (lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
556 lowband_scratch[j] = lowband[j];
557 lowband = lowband_scratch;
560 for (k=0;k<recombine;k++)
567 haar1(lowband, N_B, B);
570 /* Increasing the time resolution */
571 while ((N_B&1) == 0 && tf_change<0)
576 haar1(lowband, N_B, B);
585 /* Reorganize the samples in time order instead of frequency order */
589 deinterleave_vector(X, N_B, B0);
591 deinterleave_vector(lowband, N_B, B0);
595 /* If we need more than 32 bits, try splitting the band in two. */
596 if (!stereo && LM != -1 && b > 32<<BITRES && N>2)
598 if (LM>0 || (N&1)==0)
612 int mbits, sbits, delta;
616 /* Decide on the resolution to give to the split parameter theta */
617 offset = ((m->logN[i]+(LM<<BITRES))>>1) - (stereo ? QTHETA_OFFSET_STEREO : QTHETA_OFFSET);
618 qn = compute_qn(N, b, offset, stereo);
626 stereo_split(X, Y, N);
628 mid = vector_norm(X, N);
629 side = vector_norm(Y, N);
630 /* TODO: Renormalising X and Y *may* help fixed-point a bit at very high rate.
631 Let's do that at higher complexity */
632 /*mid = renormalise_vector(X, Q15ONE, N, 1);
633 side = renormalise_vector(Y, Q15ONE, N, 1);*/
635 /* theta is the atan() of the ratio between the (normalized)
636 side and mid. With just that parameter, we can re-scale both
637 mid and side because we know that 1) they have unit norm and
638 2) they are orthogonal. */
641 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
643 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
646 itheta = (itheta*qn+8192)>>14;
649 /* Entropy coding of the angle. We use a uniform pdf for the
650 first stereo split but a triangular one for the rest. */
654 ec_enc_uint((ec_enc*)ec, itheta, qn+1);
656 itheta = ec_dec_uint((ec_dec*)ec, qn+1);
657 qalloc = log2_frac(qn+1,BITRES);
660 ft = ((qn>>1)+1)*((qn>>1)+1);
665 fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
666 fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
667 ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
669 ec_encode((ec_enc*)ec, fl, fl+fs, ft);
673 fm = ec_decode((ec_dec*)ec, ft);
675 if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
677 itheta = (isqrt32(8*(celt_uint32)fm + 1) - 1)>>1;
679 fl = itheta*(itheta + 1)>>1;
684 - isqrt32(8*(celt_uint32)(ft - fm - 1) + 1))>>1;
685 fs = qn + 1 - itheta;
686 fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
689 ec_dec_update((ec_dec*)ec, fl, fl+fs, ft);
691 qalloc = log2_frac(ft,BITRES) - log2_frac(fs,BITRES) + 1;
693 itheta = (celt_int32)itheta*16384/qn;
695 if (stereo && encode)
696 intensity_stereo(m, X, Y, bandE, i, N);
704 } else if (itheta == 16384)
710 imid = bitexact_cos(itheta);
711 iside = bitexact_cos(16384-itheta);
712 /* This is the mid vs side allocation that minimizes squared error
714 delta = (N-1)*(log2_frac(iside,BITRES+2)-log2_frac(imid,BITRES+2))>>2;
721 mid = (1.f/32768)*imid;
722 side = (1.f/32768)*iside;
725 /* This is a special case for N=2 that only works for stereo and takes
726 advantage of the fact that mid and side are orthogonal to encode
727 the side with just one bit. */
735 /* Only need one bit for the side */
736 if (itheta != 0 && itheta != 16384)
740 *remaining_bits -= qalloc+sbits;
748 /* Here we only need to encode a sign for the side */
749 sign = x2[0]*y2[1] - x2[1]*y2[0] > 0;
750 ec_enc_bits((ec_enc*)ec, sign, 1);
752 sign = ec_dec_bits((ec_dec*)ec, 1);
756 quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, tf_change, lowband, resynth, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch);
762 X[0] = MULT16_16_Q15(mid, X[0]);
763 X[1] = MULT16_16_Q15(mid, X[1]);
764 Y[0] = MULT16_16_Q15(side, Y[0]);
765 Y[1] = MULT16_16_Q15(side, Y[1]);
767 X[0] = SUB16(tmp,Y[0]);
768 Y[0] = ADD16(tmp,Y[0]);
770 X[1] = SUB16(tmp,Y[1]);
771 Y[1] = ADD16(tmp,Y[1]);
774 /* "Normal" split code */
775 celt_norm *next_lowband2=NULL;
776 celt_norm *next_lowband_out1=NULL;
779 /* Give more bits to low-energy MDCTs than they would otherwise deserve */
783 mbits = (b-qalloc-delta)/2;
784 if (mbits > b-qalloc)
788 sbits = b-qalloc-mbits;
789 *remaining_bits -= qalloc;
791 if (lowband && !stereo)
792 next_lowband2 = lowband+N; /* >32-bit split case */
794 /* Only stereo needs to pass on lowband_out. Otherwise, it's
795 handled at the end */
797 next_lowband_out1 = lowband_out;
799 next_level = level+1;
801 quant_band(encode, m, i, X, NULL, N, mbits, spread, B, tf_change,
802 lowband, resynth, ec, remaining_bits, LM, next_lowband_out1,
803 NULL, next_level, seed, MULT16_16_P15(gain,mid), lowband_scratch);
804 quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, tf_change,
805 next_lowband2, resynth, ec, remaining_bits, LM, NULL,
806 NULL, next_level, seed, MULT16_16_P15(gain,side), NULL);
810 /* This is the basic no-split case */
811 q = bits2pulses(m, i, LM, b);
812 curr_bits = pulses2bits(m, i, LM, q);
813 *remaining_bits -= curr_bits;
815 /* Ensures we can never bust the budget */
816 while (*remaining_bits < 0 && q > 0)
818 *remaining_bits += curr_bits;
820 curr_bits = pulses2bits(m, i, LM, q);
821 *remaining_bits -= curr_bits;
824 /* Finally do the actual quantization */
826 alg_quant(X, N, q, spread, B, lowband, resynth, (ec_enc*)ec, seed, gain);
828 alg_unquant(X, N, q, spread, B, lowband, (ec_dec*)ec, seed, gain);
831 /* This code is used by the decoder and by the resynthesis-enabled encoder */
837 stereo_merge(X, Y, mid, side, N);
838 } else if (level == 0)
842 /* Undo the sample reorganization going from time order to frequency order */
844 interleave_vector(X, N_B, B0);
846 /* Undo time-freq changes that we did earlier */
849 for (k=0;k<time_divide;k++)
856 for (k=0;k<recombine;k++)
863 /* Scale output for later folding */
868 n = celt_sqrt(SHL32(EXTEND32(N0),22));
870 lowband_out[j] = MULT16_16_Q15(n,X[j]);
876 void quant_all_bands(int encode, const CELTMode *m, int start, int end, celt_norm *_X, celt_norm *_Y, const celt_ener *bandE, int *pulses, int shortBlocks, int fold, int *tf_res, int resynth, int total_bits, void *ec, int LM, int codedBands)
879 celt_int32 remaining_bits;
880 const celt_int16 * restrict eBands = m->eBands;
881 celt_norm * restrict norm;
882 VARDECL(celt_norm, _norm);
883 VARDECL(celt_norm, lowband_scratch);
888 int update_lowband = 1;
889 int C = _Y != NULL ? 2 : 1;
893 B = shortBlocks ? M : 1;
894 ALLOC(_norm, M*eBands[m->nbEBands], celt_norm);
895 ALLOC(lowband_scratch, M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]), celt_norm);
899 seed = ((ec_enc*)ec)->rng;
901 seed = ((ec_dec*)ec)->rng;
904 for (i=start;i<end;i++)
910 celt_norm * restrict X, * restrict Y;
918 N = M*eBands[i+1]-M*eBands[i];
920 tell = ec_enc_tell((ec_enc*)ec, BITRES);
922 tell = ec_dec_tell((ec_dec*)ec, BITRES);
924 /* Compute how many bits we want to allocate to this band */
927 remaining_bits = (total_bits<<BITRES)-tell-1;
928 if (i <= codedBands-1)
930 curr_balance = (codedBands-i);
931 if (curr_balance > 3)
933 curr_balance = balance / curr_balance;
934 b = IMIN(remaining_bits+1,pulses[i]+curr_balance);
940 /* Prevents ridiculous bit depths */
941 if (b > C*16*N<<BITRES)
944 if (M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband==NULL))
945 lowband = norm+M*eBands[i]-N;
947 tf_change = tf_res[i];
955 quant_band(encode, m, i, X, Y, N, b, fold, B, tf_change,
956 lowband, resynth, ec, &remaining_bits, LM,
957 norm+M*eBands[i], bandE, 0, &seed, Q15ONE, lowband_scratch);
959 balance += pulses[i] + tell;
961 /* Update the folding position only as long as we have 2 bit/sample depth */
962 update_lowband = (b>>BITRES)>2*N;