1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2007-2009 Timothy B. Terriberry
4 Written by Timothy B. Terriberry and Jean-Marc Valin */
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
21 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "os_support.h"
41 /*Guaranteed to return a conservatively large estimate of the binary logarithm
42 with frac bits of fractional precision.
43 Tested for all possible 32-bit inputs with frac=4, where the maximum
44 overestimation is 0.06254243 bits.*/
45 int log2_frac(opus_uint32 val, int frac)
50 /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
51 before the shift would cause overflow (e.g., for 0xFFFFxxxx).*/
52 if(l>16)val=(val>>l-16)+((val&(1<<l-16)-1)+(1<<l-16)-1>>l-16);
55 /*Note that we always need one iteration, since the rounding up above means
56 that we might need to adjust the integer part of the logarithm.*/
62 val=val*val+0x7FFF>>15;
65 /*If val is not exactly 0x8000, then we have to round up the remainder.*/
66 return l+(val>0x8000);
68 /*Exact powers of two require no rounding.*/
69 else return l-1<<frac;
72 #ifndef SMALL_FOOTPRINT
75 #define MASK32 (0xFFFFFFFF)
77 /*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
78 static const opus_uint32 INV_TABLE[64]={
79 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
80 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
81 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
82 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
83 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
84 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
85 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
86 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
87 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
88 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
89 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
90 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
91 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
92 0xD8FD8FD9,0x8D28AC43,0xDA6C0965,0xDB195E8F,
93 0x0FDBC091,0x61F2A4BB,0xDCFDCFDD,0x46FDD947,
94 0x56BE69C9,0xEB2FDEB3,0x26E978D5,0xEFDFBF7F,
96 0x0FE03F81,0xC9484E2B,0xE133F84D,0xE1A8C537,
97 0x077975B9,0x70586723,0xCD29C245,0xFAA11E6F,
98 0x0FE3C071,0x08B51D9B,0x8CE2CABD,0xBF937F27,
99 0xA8FE53A9,0x592FE593,0x2C0685B5,0x2EB11B5F,
100 0xFCD1E361,0x451AB30B,0x72CFE72D,0xDB35A717,
101 0xFB74A399,0xE80BFA03,0x0D516325,0x1BCB564F,
102 0xE02E4851,0xD962AE7B,0x10F8ED9D,0x95AEDD07,
103 0xE9DC0589,0xA18A4473,0xEA53FA95,0xEE936F3F,
104 0x90948F41,0xEAFEAFEB,0x3D137E0D,0xEF46C0F7,
105 0x028C1979,0x791064E3,0xC04FEC05,0xE115062F,
106 0x32385831,0x6E68575B,0xA10D387D,0x6FECF2E7,
107 0x3FB47F69,0xED4BFB53,0x74FED775,0xDB43BB1F,
108 0x87654321,0x9BA144CB,0x478BBCED,0xBFB912D7,
109 0x1FDCD759,0x14B2A7C3,0xCB125CE5,0x437B2E0F,
110 0x10FEF011,0xD2B3183B,0x386CAB5D,0xEF6AC0C7,
111 0x0E64C149,0x9A020A33,0xE6B41C55,0xFEFEFEFF*/
114 /*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
115 _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
116 fits in 32 bits, but currently the table for multiplicative inverses is only
118 static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b,
119 opus_uint32 _c,int _d){
120 return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
123 /*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
124 _d does not actually have to be even, but imusdiv32odd will be faster when
125 it's odd, so you should use that instead.
126 _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
127 table for multiplicative inverses is only valid for _d<=256).
128 _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
130 static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
131 opus_uint32 _c,int _d){
137 shift=EC_ILOG(_d^_d-1);
138 celt_assert(_d<=256);
139 inv=INV_TABLE[_d-1>>shift];
143 return (_a*(_b>>shift)-(_c>>shift)+
144 (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32;
147 #endif /* SMALL_FOOTPRINT */
149 /*Although derived separately, the pulse vector coding scheme is equivalent to
150 a Pyramid Vector Quantizer \cite{Fis86}.
151 Some additional notes about an early version appear at
152 http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
153 and the definitions of some terms have evolved since that was written.
155 The conversion from a pulse vector to an integer index (encoding) and back
156 (decoding) is governed by two related functions, V(N,K) and U(N,K).
158 V(N,K) = the number of combinations, with replacement, of N items, taken K
159 at a time, when a sign bit is added to each item taken at least once (i.e.,
160 the number of N-dimensional unit pulse vectors with K pulses).
161 One way to compute this is via
162 V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
163 where choose() is the binomial function.
164 A table of values for N<10 and K<10 looks like:
166 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
167 {1, 2, 2, 2, 2, 2, 2, 2, 2, 2},
168 {1, 4, 8, 12, 16, 20, 24, 28, 32, 36},
169 {1, 6, 18, 38, 66, 102, 146, 198, 258, 326},
170 {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992},
171 {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290},
172 {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436},
173 {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598},
174 {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688},
175 {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146}
178 U(N,K) = the number of such combinations wherein N-1 objects are taken at
181 U(N,K) = sum(k=0...K-1,V(N-1,k))
182 = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
183 The latter expression also makes clear that U(N,K) is half the number of such
184 combinations wherein the first object is taken at least once.
185 Although it may not be clear from either of these definitions, U(N,K) is the
186 natural function to work with when enumerating the pulse vector codebooks,
188 U(N,K) is not well-defined for N=0, but with the extension
189 U(0,K) = K>0 ? 0 : 1,
190 the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
192 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
193 {0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
194 {0, 1, 3, 5, 7, 9, 11, 13, 15, 17},
195 {0, 1, 5, 13, 25, 41, 61, 85, 113, 145},
196 {0, 1, 7, 25, 63, 129, 231, 377, 575, 833},
197 {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649},
198 {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073},
199 {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081},
200 {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545},
201 {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729}
204 With this extension, V(N,K) may be written in terms of U(N,K):
205 V(N,K) = U(N,K) + U(N,K+1)
207 Thus U(N,K+1) represents the number of combinations where the first element
208 is positive or zero, and U(N,K) represents the number of combinations where
210 With a large enough table of U(N,K) values, we could write O(N) encoding
211 and O(min(N*log(K),N+K)) decoding routines, but such a table would be
212 prohibitively large for small embedded devices (K may be as large as 32767
213 for small N, and N may be as large as 200).
215 Both functions obey the same recurrence relation:
216 V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
217 U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
218 for all N>0, K>0, with different initial conditions at N=0 or K=0.
219 This allows us to construct a row of one of the tables above given the
220 previous row or the next row.
221 Thus we can derive O(NK) encoding and decoding routines with O(K) memory
222 using only addition and subtraction.
224 When encoding, we build up from the U(2,K) row and work our way forwards.
225 When decoding, we need to start at the U(N,K) row and work our way backwards,
226 which requires a means of computing U(N,K).
227 U(N,K) may be computed from two previous values with the same N:
228 U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
229 for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
230 previous values with the same K:
231 U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
233 This allows us to construct an arbitrary row of the U(N,K) table by starting
234 with the first two values, which are constants.
235 This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
237 Similar relations can be derived for V(N,K), but are not used here.
239 For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
240 polynomial for fixed N.
244 U(3,K) = (2*K-2)*K+1,
245 U(4,K) = (((4*K-6)*K+8)*K-3)/3,
246 U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
251 V(4,K) = 8*(K*K+2)*K/3,
252 V(5,K) = ((4*K*K+20)*K*K+6)/3,
254 This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
255 small N (and indeed decoding is also O(N) for N<3).
258 author="Thomas R. Fischer",
259 title="A Pyramid Vector Quantizer",
260 journal="IEEE Transactions on Information Theory",
268 #ifndef SMALL_FOOTPRINT
271 static inline unsigned ucwrs1(int _k){
276 static inline unsigned ncwrs1(int _k){
281 Note that this may be called with _k=32768 (maxK[2]+1).*/
282 static inline unsigned ucwrs2(unsigned _k){
283 return _k?_k+(_k-1):0;
287 static inline opus_uint32 ncwrs2(int _k){
288 return _k?4*(opus_uint32)_k:1;
292 Note that this may be called with _k=32768 (maxK[3]+1).*/
293 static inline opus_uint32 ucwrs3(unsigned _k){
294 return _k?(2*(opus_uint32)_k-2)*_k+1:0;
298 static inline opus_uint32 ncwrs3(int _k){
299 return _k?2*(2*(unsigned)_k*(opus_uint32)_k+1):1;
303 static inline opus_uint32 ucwrs4(int _k){
304 return _k?imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1):0;
308 static inline opus_uint32 ncwrs4(int _k){
309 return _k?((_k*(opus_uint32)_k+2)*_k)/3<<3:1;
313 static inline opus_uint32 ucwrs5(int _k){
314 return _k?(((((_k-2)*(unsigned)_k+5)*(opus_uint32)_k-4)*_k)/3<<1)+1:0;
318 static inline opus_uint32 ncwrs5(int _k){
319 return _k?(((_k*(unsigned)_k+5)*(opus_uint32)_k*_k)/3<<2)+2:1;
322 #endif /* SMALL_FOOTPRINT */
324 /*Computes the next row/column of any recurrence that obeys the relation
325 u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
326 _ui0 is the base case for the new row/column.*/
327 static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
330 /*This do-while will overrun the array if we don't have storage for at least
333 ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
340 /*Computes the previous row/column of any recurrence that obeys the relation
341 u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
342 _ui0 is the base case for the new row/column.*/
343 static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
346 /*This do-while will overrun the array if we don't have storage for at least
349 ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
356 /*Compute V(_n,_k), as well as U(_n,0..._k+1).
357 _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
358 static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
363 /*We require storage at least 3 values (e.g., _k>0).*/
367 #ifndef SMALL_FOOTPRINT
371 /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
372 /*If _n==1, _u[i] should be 1 for i>1.*/
374 /*If _k==0, the following do-while loop will overflow the buffer.*/
379 for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
381 #ifndef SMALL_FOOTPRINT
385 _u[2]=n2m1=um1=(_n<<1)-1;
387 /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
388 _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
390 _u[k]=um1=imusdiv32odd(n2m1,um2,um1,k-1>>1)+um1;
393 #endif /* SMALL_FOOTPRINT */
394 return _u[_k]+_u[_k+1];
397 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
398 set of size 1 with associated sign bits.
399 _y: Returns the vector of pulses.*/
400 static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){
406 #ifndef SMALL_FOOTPRINT
408 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
409 set of size 2 with associated sign bits.
410 _y: Returns the vector of pulses.*/
411 static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){
427 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
428 set of size 3 with associated sign bits.
429 _y: Returns the vector of pulses.*/
430 static void cwrsi3(int _k,opus_uint32 _i,int *_y){
438 /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
439 _i<2147418113=U(3,32768)).*/
440 _k=_i>0?isqrt32(2*_i-1)+1>>1:0;
448 /*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
449 of size 4 with associated sign bits.
450 _y: Returns the vector of pulses.*/
451 static void cwrsi4(int _k,opus_uint32 _i,int *_y){
461 /*We could solve a cubic for k here, but the form of the direct solution does
462 not lend itself well to exact integer arithmetic.
463 Instead we do a binary search on U(4,K).*/
473 else if(p>_i)kr=_k-1;
482 /*Returns the _i'th combination of _k elements (at most 238) chosen from a set
483 of size 5 with associated sign bits.
484 _y: Returns the vector of pulses.*/
485 static void cwrsi5(int _k,opus_uint32 _i,int *_y){
493 /* A binary search on U(5,K) avoids the need for 64-bit arithmetic */
504 else if(p>_i)kr=_k-1;
513 #endif /* SMALL_FOOTPRINT */
515 /*Returns the _i'th combination of _k elements chosen from a set of size _n
516 with associated sign bits.
517 _y: Returns the vector of pulses.
518 _u: Must contain entries [0..._k+1] of row _n of U() on input.
519 Its contents will be destructively modified.*/
520 static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
533 while(p>_i)p=_u[--_k];
543 /*Returns the index of the given combination of K elements chosen from a set
544 of size 1 with associated sign bits.
545 _y: The vector of pulses, whose sum of absolute values is K.
547 static inline opus_uint32 icwrs1(const int *_y,int *_k){
552 #ifndef SMALL_FOOTPRINT
554 /*Returns the index of the given combination of K elements chosen from a set
555 of size 2 with associated sign bits.
556 _y: The vector of pulses, whose sum of absolute values is K.
558 static inline opus_uint32 icwrs2(const int *_y,int *_k){
564 if(_y[0]<0)i+=ucwrs2(k+1U);
569 /*Returns the index of the given combination of K elements chosen from a set
570 of size 3 with associated sign bits.
571 _y: The vector of pulses, whose sum of absolute values is K.
573 static inline opus_uint32 icwrs3(const int *_y,int *_k){
579 if(_y[0]<0)i+=ucwrs3(k+1U);
584 /*Returns the index of the given combination of K elements chosen from a set
585 of size 4 with associated sign bits.
586 _y: The vector of pulses, whose sum of absolute values is K.
588 static inline opus_uint32 icwrs4(const int *_y,int *_k){
594 if(_y[0]<0)i+=ucwrs4(k+1);
599 /*Returns the index of the given combination of K elements chosen from a set
600 of size 5 with associated sign bits.
601 _y: The vector of pulses, whose sum of absolute values is K.
603 static inline opus_uint32 icwrs5(const int *_y,int *_k){
609 if(_y[0]<0)i+=ucwrs5(k+1);
613 #endif /* SMALL_FOOTPRINT */
615 /*Returns the index of the given combination of K elements chosen from a set
616 of size _n with associated sign bits.
617 _y: The vector of pulses, whose sum of absolute values must be _k.
618 _nc: Returns V(_n,_k).*/
619 opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
624 /*We can't unroll the first two iterations of the loop unless _n>=2.*/
627 for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
628 i=icwrs1(_y+_n-1,&k);
632 if(_y[j]<0)i+=_u[k+1];
637 if(_y[j]<0)i+=_u[k+1];
644 void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
646 /*_maxk==0 => there's nothing to do.*/
647 celt_assert(_maxk>0);
651 for (k=1;k<=_maxk;k++)
655 VARDECL(opus_uint32,u);
657 ALLOC(u,_maxk+2U,opus_uint32);
658 ncwrs_urow(_n,_maxk,u);
659 for(k=1;k<=_maxk;k++)
660 _bits[k]=log2_frac(u[k]+u[k+1],_frac);
664 #endif /* CUSTOM_MODES */
666 void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
673 celt_assert(ncwrs1(_k)==2);
674 ec_enc_bits(_enc,i,1);
676 #ifndef SMALL_FOOTPRINT
679 ec_enc_uint(_enc,i,ncwrs2(_k));
683 ec_enc_uint(_enc,i,ncwrs3(_k));
687 ec_enc_uint(_enc,i,ncwrs4(_k));
691 ec_enc_uint(_enc,i,ncwrs5(_k));
696 VARDECL(opus_uint32,u);
699 ALLOC(u,_k+2U,opus_uint32);
700 i=icwrs(_n,_k,&nc,_y,u);
701 ec_enc_uint(_enc,i,nc);
707 void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
717 celt_assert(ncwrs1(_k)==2);
718 cwrsi1(_k,ec_dec_bits(_dec,1),_y);
720 #ifndef SMALL_FOOTPRINT
721 case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
722 case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
723 case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
724 case 5:cwrsi5(_k,ec_dec_uint(_dec,ncwrs5(_k)),_y);break;
728 VARDECL(opus_uint32,u);
730 ALLOC(u,_k+2U,opus_uint32);
731 cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);