1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2007-2009 Timothy B. Terriberry
4 Written by Timothy B. Terriberry and Jean-Marc Valin */
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
21 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "os_support.h"
43 /*Guaranteed to return a conservatively large estimate of the binary logarithm
44 with frac bits of fractional precision.
45 Tested for all possible 32-bit inputs with frac=4, where the maximum
46 overestimation is 0.06254243 bits.*/
47 int log2_frac(opus_uint32 val, int frac)
52 /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
53 before the shift would cause overflow (e.g., for 0xFFFFxxxx).*/
54 if(l>16)val=(val>>l-16)+((val&(1<<l-16)-1)+(1<<l-16)-1>>l-16);
57 /*Note that we always need one iteration, since the rounding up above means
58 that we might need to adjust the integer part of the logarithm.*/
64 val=val*val+0x7FFF>>15;
67 /*If val is not exactly 0x8000, then we have to round up the remainder.*/
68 return l+(val>0x8000);
70 /*Exact powers of two require no rounding.*/
71 else return l-1<<frac;
75 #ifndef SMALL_FOOTPRINT
77 #define MASK32 (0xFFFFFFFF)
79 /*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
80 static const opus_uint32 INV_TABLE[53]={
81 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
82 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
83 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
84 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
85 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
86 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
87 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
88 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
89 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
90 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
91 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
92 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
93 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
97 /*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
98 _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
99 fits in 32 bits, but currently the table for multiplicative inverses is only
101 static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b,
102 opus_uint32 _c,int _d){
104 return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
107 /*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
108 _d does not actually have to be even, but imusdiv32odd will be faster when
109 it's odd, so you should use that instead.
110 _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
111 table for multiplicative inverses is only valid for _d<=54).
112 _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
114 static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
115 opus_uint32 _c,int _d){
122 shift=EC_ILOG(_d^_d-1);
123 inv=INV_TABLE[_d-1>>shift];
127 return (_a*(_b>>shift)-(_c>>shift)+
128 (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32;
131 #endif /* SMALL_FOOTPRINT */
133 /*Although derived separately, the pulse vector coding scheme is equivalent to
134 a Pyramid Vector Quantizer \cite{Fis86}.
135 Some additional notes about an early version appear at
136 http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
137 and the definitions of some terms have evolved since that was written.
139 The conversion from a pulse vector to an integer index (encoding) and back
140 (decoding) is governed by two related functions, V(N,K) and U(N,K).
142 V(N,K) = the number of combinations, with replacement, of N items, taken K
143 at a time, when a sign bit is added to each item taken at least once (i.e.,
144 the number of N-dimensional unit pulse vectors with K pulses).
145 One way to compute this is via
146 V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
147 where choose() is the binomial function.
148 A table of values for N<10 and K<10 looks like:
150 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
151 {1, 2, 2, 2, 2, 2, 2, 2, 2, 2},
152 {1, 4, 8, 12, 16, 20, 24, 28, 32, 36},
153 {1, 6, 18, 38, 66, 102, 146, 198, 258, 326},
154 {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992},
155 {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290},
156 {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436},
157 {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598},
158 {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688},
159 {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146}
162 U(N,K) = the number of such combinations wherein N-1 objects are taken at
165 U(N,K) = sum(k=0...K-1,V(N-1,k))
166 = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
167 The latter expression also makes clear that U(N,K) is half the number of such
168 combinations wherein the first object is taken at least once.
169 Although it may not be clear from either of these definitions, U(N,K) is the
170 natural function to work with when enumerating the pulse vector codebooks,
172 U(N,K) is not well-defined for N=0, but with the extension
173 U(0,K) = K>0 ? 0 : 1,
174 the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
176 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
177 {0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
178 {0, 1, 3, 5, 7, 9, 11, 13, 15, 17},
179 {0, 1, 5, 13, 25, 41, 61, 85, 113, 145},
180 {0, 1, 7, 25, 63, 129, 231, 377, 575, 833},
181 {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649},
182 {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073},
183 {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081},
184 {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545},
185 {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729}
188 With this extension, V(N,K) may be written in terms of U(N,K):
189 V(N,K) = U(N,K) + U(N,K+1)
191 Thus U(N,K+1) represents the number of combinations where the first element
192 is positive or zero, and U(N,K) represents the number of combinations where
194 With a large enough table of U(N,K) values, we could write O(N) encoding
195 and O(min(N*log(K),N+K)) decoding routines, but such a table would be
196 prohibitively large for small embedded devices (K may be as large as 32767
197 for small N, and N may be as large as 200).
199 Both functions obey the same recurrence relation:
200 V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
201 U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
202 for all N>0, K>0, with different initial conditions at N=0 or K=0.
203 This allows us to construct a row of one of the tables above given the
204 previous row or the next row.
205 Thus we can derive O(NK) encoding and decoding routines with O(K) memory
206 using only addition and subtraction.
208 When encoding, we build up from the U(2,K) row and work our way forwards.
209 When decoding, we need to start at the U(N,K) row and work our way backwards,
210 which requires a means of computing U(N,K).
211 U(N,K) may be computed from two previous values with the same N:
212 U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
213 for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
214 previous values with the same K:
215 U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
217 This allows us to construct an arbitrary row of the U(N,K) table by starting
218 with the first two values, which are constants.
219 This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
221 Similar relations can be derived for V(N,K), but are not used here.
223 For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
224 polynomial for fixed N.
228 U(3,K) = (2*K-2)*K+1,
229 U(4,K) = (((4*K-6)*K+8)*K-3)/3,
230 U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
235 V(4,K) = 8*(K*K+2)*K/3,
236 V(5,K) = ((4*K*K+20)*K*K+6)/3,
238 This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
239 small N (and indeed decoding is also O(N) for N<3).
242 author="Thomas R. Fischer",
243 title="A Pyramid Vector Quantizer",
244 journal="IEEE Transactions on Information Theory",
252 #ifndef SMALL_FOOTPRINT
254 static inline unsigned ncwrs1(int _k){
259 Note that this may be called with _k=32768 (maxK[2]+1).*/
260 static inline unsigned ucwrs2(unsigned _k){
261 return _k?_k+(_k-1):0;
265 static inline opus_uint32 ncwrs2(int _k){
266 return _k?4*(opus_uint32)_k:1;
270 Note that this may be called with _k=32768 (maxK[3]+1).*/
271 static inline opus_uint32 ucwrs3(unsigned _k){
272 return _k?(2*(opus_uint32)_k-2)*_k+1:0;
276 static inline opus_uint32 ncwrs3(int _k){
277 return _k?2*(2*(unsigned)_k*(opus_uint32)_k+1):1;
281 static inline opus_uint32 ucwrs4(int _k){
282 return _k?imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1):0;
286 static inline opus_uint32 ncwrs4(int _k){
287 return _k?((_k*(opus_uint32)_k+2)*_k)/3<<3:1;
291 static inline opus_uint32 ucwrs5(int _k){
292 return _k?(((((_k-2)*(unsigned)_k+5)*(opus_uint32)_k-4)*_k)/3<<1)+1:0;
296 static inline opus_uint32 ncwrs5(int _k){
297 return _k?(((_k*(unsigned)_k+5)*(opus_uint32)_k*_k)/3<<2)+2:1;
300 #endif /* SMALL_FOOTPRINT */
302 /*Computes the next row/column of any recurrence that obeys the relation
303 u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
304 _ui0 is the base case for the new row/column.*/
305 static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
308 /*This do-while will overrun the array if we don't have storage for at least
311 ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
318 /*Computes the previous row/column of any recurrence that obeys the relation
319 u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
320 _ui0 is the base case for the new row/column.*/
321 static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
324 /*This do-while will overrun the array if we don't have storage for at least
327 ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
334 /*Compute V(_n,_k), as well as U(_n,0..._k+1).
335 _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
336 static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
341 /*We require storage at least 3 values (e.g., _k>0).*/
345 #ifndef SMALL_FOOTPRINT
346 /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE,
347 but _k isn't tested here because k<=52 for n=7*/
351 /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
352 /*If _n==1, _u[i] should be 1 for i>1.*/
354 /*If _k==0, the following do-while loop will overflow the buffer.*/
359 for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
361 #ifndef SMALL_FOOTPRINT
365 _u[2]=n2m1=um1=(_n<<1)-1;
367 /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
368 _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
370 _u[k]=um1=imusdiv32odd(n2m1,um2,um1,k-1>>1)+um1;
373 #endif /* SMALL_FOOTPRINT */
374 return _u[_k]+_u[_k+1];
377 #ifndef SMALL_FOOTPRINT
379 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
380 set of size 1 with associated sign bits.
381 _y: Returns the vector of pulses.*/
382 static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){
388 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
389 set of size 2 with associated sign bits.
390 _y: Returns the vector of pulses.*/
391 static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){
407 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
408 set of size 3 with associated sign bits.
409 _y: Returns the vector of pulses.*/
410 static void cwrsi3(int _k,opus_uint32 _i,int *_y){
418 /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
419 _i<2147418113=U(3,32768)).*/
420 _k=_i>0?isqrt32(2*_i-1)+1>>1:0;
428 /*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
429 of size 4 with associated sign bits.
430 _y: Returns the vector of pulses.*/
431 static void cwrsi4(int _k,opus_uint32 _i,int *_y){
441 /*We could solve a cubic for k here, but the form of the direct solution does
442 not lend itself well to exact integer arithmetic.
443 Instead we do a binary search on U(4,K).*/
453 else if(p>_i)kr=_k-1;
462 #endif /* SMALL_FOOTPRINT */
464 /*Returns the _i'th combination of _k elements chosen from a set of size _n
465 with associated sign bits.
466 _y: Returns the vector of pulses.
467 _u: Must contain entries [0..._k+1] of row _n of U() on input.
468 Its contents will be destructively modified.*/
469 static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
482 while(p>_i)p=_u[--_k];
491 /*Returns the index of the given combination of K elements chosen from a set
492 of size 1 with associated sign bits.
493 _y: The vector of pulses, whose sum of absolute values is K.
495 static inline opus_uint32 icwrs1(const int *_y,int *_k){
500 #ifndef SMALL_FOOTPRINT
502 /*Returns the index of the given combination of K elements chosen from a set
503 of size 2 with associated sign bits.
504 _y: The vector of pulses, whose sum of absolute values is K.
506 static inline opus_uint32 icwrs2(const int *_y,int *_k){
512 if(_y[0]<0)i+=ucwrs2(k+1U);
517 /*Returns the index of the given combination of K elements chosen from a set
518 of size 3 with associated sign bits.
519 _y: The vector of pulses, whose sum of absolute values is K.
521 static inline opus_uint32 icwrs3(const int *_y,int *_k){
527 if(_y[0]<0)i+=ucwrs3(k+1U);
532 /*Returns the index of the given combination of K elements chosen from a set
533 of size 4 with associated sign bits.
534 _y: The vector of pulses, whose sum of absolute values is K.
536 static inline opus_uint32 icwrs4(const int *_y,int *_k){
542 if(_y[0]<0)i+=ucwrs4(k+1);
547 /*Returns the index of the given combination of K elements chosen from a set
548 of size 5 with associated sign bits.
549 _y: The vector of pulses, whose sum of absolute values is K.
551 static inline opus_uint32 icwrs5(const int *_y,int *_k){
557 if(_y[0]<0)i+=ucwrs5(k+1);
561 #endif /* SMALL_FOOTPRINT */
563 /*Returns the index of the given combination of K elements chosen from a set
564 of size _n with associated sign bits.
565 _y: The vector of pulses, whose sum of absolute values must be _k.
566 _nc: Returns V(_n,_k).*/
567 opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
572 /*We can't unroll the first two iterations of the loop unless _n>=2.*/
575 for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
576 i=icwrs1(_y+_n-1,&k);
580 if(_y[j]<0)i+=_u[k+1];
585 if(_y[j]<0)i+=_u[k+1];
592 void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
594 /*_maxk==0 => there's nothing to do.*/
595 celt_assert(_maxk>0);
599 for (k=1;k<=_maxk;k++)
603 VARDECL(opus_uint32,u);
605 ALLOC(u,_maxk+2U,opus_uint32);
606 ncwrs_urow(_n,_maxk,u);
607 for(k=1;k<=_maxk;k++)
608 _bits[k]=log2_frac(u[k]+u[k+1],_frac);
612 #endif /* CUSTOM_MODES */
614 void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
616 #ifndef SMALL_FOOTPRINT
620 ec_enc_uint(_enc,i,ncwrs2(_k));
624 ec_enc_uint(_enc,i,ncwrs3(_k));
628 ec_enc_uint(_enc,i,ncwrs4(_k));
633 VARDECL(opus_uint32,u);
636 ALLOC(u,_k+2U,opus_uint32);
637 i=icwrs(_n,_k,&nc,_y,u);
638 ec_enc_uint(_enc,i,nc);
640 #ifndef SMALL_FOOTPRINT
646 void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
648 #ifndef SMALL_FOOTPRINT
650 case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
651 case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
652 case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
656 VARDECL(opus_uint32,u);
658 ALLOC(u,_k+2U,opus_uint32);
659 cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
661 #ifndef SMALL_FOOTPRINT