1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2007-2009 Timothy B. Terriberry
4 Written by Timothy B. Terriberry and Jean-Marc Valin */
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
17 - Neither the name of the Xiph.org Foundation nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include "os_support.h"
45 /*Guaranteed to return a conservatively large estimate of the binary logarithm
46 with frac bits of fractional precision.
47 Tested for all possible 32-bit inputs with frac=4, where the maximum
48 overestimation is 0.06254243 bits.*/
49 int log2_frac(ec_uint32 val, int frac)
54 /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
55 before the shift would cause overflow (e.g., for 0xFFFFxxxx).*/
56 if(l>16)val=(val>>l-16)+((val&(1<<l-16)-1)+(1<<l-16)-1>>l-16);
59 /*Note that we always need one iteration, since the rounding up above means
60 that we might need to adjust the integer part of the logarithm.*/
66 val=val*val+0x7FFF>>15;
69 /*If val is not exactly 0x8000, then we have to round up the remainder.*/
70 return l+(val>0x8000);
72 /*Exact powers of two require no rounding.*/
73 else return l-1<<frac;
76 #ifndef SMALL_FOOTPRINT
79 #define MASK32 (0xFFFFFFFF)
81 /*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
82 static const celt_uint32 INV_TABLE[64]={
83 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
84 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
85 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
86 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
87 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
88 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
89 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
90 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
91 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
92 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
93 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
94 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
95 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
96 0xD8FD8FD9,0x8D28AC43,0xDA6C0965,0xDB195E8F,
97 0x0FDBC091,0x61F2A4BB,0xDCFDCFDD,0x46FDD947,
98 0x56BE69C9,0xEB2FDEB3,0x26E978D5,0xEFDFBF7F,
100 0x0FE03F81,0xC9484E2B,0xE133F84D,0xE1A8C537,
101 0x077975B9,0x70586723,0xCD29C245,0xFAA11E6F,
102 0x0FE3C071,0x08B51D9B,0x8CE2CABD,0xBF937F27,
103 0xA8FE53A9,0x592FE593,0x2C0685B5,0x2EB11B5F,
104 0xFCD1E361,0x451AB30B,0x72CFE72D,0xDB35A717,
105 0xFB74A399,0xE80BFA03,0x0D516325,0x1BCB564F,
106 0xE02E4851,0xD962AE7B,0x10F8ED9D,0x95AEDD07,
107 0xE9DC0589,0xA18A4473,0xEA53FA95,0xEE936F3F,
108 0x90948F41,0xEAFEAFEB,0x3D137E0D,0xEF46C0F7,
109 0x028C1979,0x791064E3,0xC04FEC05,0xE115062F,
110 0x32385831,0x6E68575B,0xA10D387D,0x6FECF2E7,
111 0x3FB47F69,0xED4BFB53,0x74FED775,0xDB43BB1F,
112 0x87654321,0x9BA144CB,0x478BBCED,0xBFB912D7,
113 0x1FDCD759,0x14B2A7C3,0xCB125CE5,0x437B2E0F,
114 0x10FEF011,0xD2B3183B,0x386CAB5D,0xEF6AC0C7,
115 0x0E64C149,0x9A020A33,0xE6B41C55,0xFEFEFEFF*/
118 /*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
119 _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
120 fits in 32 bits, but currently the table for multiplicative inverses is only
122 static inline celt_uint32 imusdiv32odd(celt_uint32 _a,celt_uint32 _b,
123 celt_uint32 _c,int _d){
124 return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
127 /*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
128 _d does not actually have to be even, but imusdiv32odd will be faster when
129 it's odd, so you should use that instead.
130 _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
131 table for multiplicative inverses is only valid for _d<=256).
132 _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
134 static inline celt_uint32 imusdiv32even(celt_uint32 _a,celt_uint32 _b,
135 celt_uint32 _c,int _d){
141 shift=EC_ILOG(_d^_d-1);
142 celt_assert(_d<=256);
143 inv=INV_TABLE[_d-1>>shift];
147 return (_a*(_b>>shift)-(_c>>shift)+
148 (_a*(_b&mask)+one-(_c&mask)>>shift)-1)*inv&MASK32;
151 #endif /* SMALL_FOOTPRINT */
153 /*Although derived separately, the pulse vector coding scheme is equivalent to
154 a Pyramid Vector Quantizer \cite{Fis86}.
155 Some additional notes about an early version appear at
156 http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
157 and the definitions of some terms have evolved since that was written.
159 The conversion from a pulse vector to an integer index (encoding) and back
160 (decoding) is governed by two related functions, V(N,K) and U(N,K).
162 V(N,K) = the number of combinations, with replacement, of N items, taken K
163 at a time, when a sign bit is added to each item taken at least once (i.e.,
164 the number of N-dimensional unit pulse vectors with K pulses).
165 One way to compute this is via
166 V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
167 where choose() is the binomial function.
168 A table of values for N<10 and K<10 looks like:
170 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
171 {1, 2, 2, 2, 2, 2, 2, 2, 2, 2},
172 {1, 4, 8, 12, 16, 20, 24, 28, 32, 36},
173 {1, 6, 18, 38, 66, 102, 146, 198, 258, 326},
174 {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992},
175 {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290},
176 {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436},
177 {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598},
178 {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688},
179 {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146}
182 U(N,K) = the number of such combinations wherein N-1 objects are taken at
185 U(N,K) = sum(k=0...K-1,V(N-1,k))
186 = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
187 The latter expression also makes clear that U(N,K) is half the number of such
188 combinations wherein the first object is taken at least once.
189 Although it may not be clear from either of these definitions, U(N,K) is the
190 natural function to work with when enumerating the pulse vector codebooks,
192 U(N,K) is not well-defined for N=0, but with the extension
193 U(0,K) = K>0 ? 0 : 1,
194 the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
196 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
197 {0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
198 {0, 1, 3, 5, 7, 9, 11, 13, 15, 17},
199 {0, 1, 5, 13, 25, 41, 61, 85, 113, 145},
200 {0, 1, 7, 25, 63, 129, 231, 377, 575, 833},
201 {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649},
202 {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073},
203 {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081},
204 {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545},
205 {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729}
208 With this extension, V(N,K) may be written in terms of U(N,K):
209 V(N,K) = U(N,K) + U(N,K+1)
211 Thus U(N,K+1) represents the number of combinations where the first element
212 is positive or zero, and U(N,K) represents the number of combinations where
214 With a large enough table of U(N,K) values, we could write O(N) encoding
215 and O(min(N*log(K),N+K)) decoding routines, but such a table would be
216 prohibitively large for small embedded devices (K may be as large as 32767
217 for small N, and N may be as large as 200).
219 Both functions obey the same recurrence relation:
220 V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
221 U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
222 for all N>0, K>0, with different initial conditions at N=0 or K=0.
223 This allows us to construct a row of one of the tables above given the
224 previous row or the next row.
225 Thus we can derive O(NK) encoding and decoding routines with O(K) memory
226 using only addition and subtraction.
228 When encoding, we build up from the U(2,K) row and work our way forwards.
229 When decoding, we need to start at the U(N,K) row and work our way backwards,
230 which requires a means of computing U(N,K).
231 U(N,K) may be computed from two previous values with the same N:
232 U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
233 for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
234 previous values with the same K:
235 U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
237 This allows us to construct an arbitrary row of the U(N,K) table by starting
238 with the first two values, which are constants.
239 This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
241 Similar relations can be derived for V(N,K), but are not used here.
243 For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
244 polynomial for fixed N.
248 U(3,K) = (2*K-2)*K+1,
249 U(4,K) = (((4*K-6)*K+8)*K-3)/3,
250 U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
255 V(4,K) = 8*(K*K+2)*K/3,
256 V(5,K) = ((4*K*K+20)*K*K+6)/3,
258 This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
259 small N (and indeed decoding is also O(N) for N<3).
262 author="Thomas R. Fischer",
263 title="A Pyramid Vector Quantizer",
264 journal="IEEE Transactions on Information Theory",
272 #ifndef SMALL_FOOTPRINT
275 static inline unsigned ucwrs1(int _k){
280 static inline unsigned ncwrs1(int _k){
285 Note that this may be called with _k=32768 (maxK[2]+1).*/
286 static inline unsigned ucwrs2(unsigned _k){
287 return _k?_k+(_k-1):0;
291 static inline celt_uint32 ncwrs2(int _k){
292 return _k?4*(celt_uint32)_k:1;
296 Note that this may be called with _k=32768 (maxK[3]+1).*/
297 static inline celt_uint32 ucwrs3(unsigned _k){
298 return _k?(2*(celt_uint32)_k-2)*_k+1:0;
302 static inline celt_uint32 ncwrs3(int _k){
303 return _k?2*(2*(unsigned)_k*(celt_uint32)_k+1):1;
307 static inline celt_uint32 ucwrs4(int _k){
308 return _k?imusdiv32odd(2*_k,(2*_k-3)*(celt_uint32)_k+4,3,1):0;
312 static inline celt_uint32 ncwrs4(int _k){
313 return _k?((_k*(celt_uint32)_k+2)*_k)/3<<3:1;
317 static inline celt_uint32 ucwrs5(int _k){
318 return _k?(((((_k-2)*(unsigned)_k+5)*(celt_uint32)_k-4)*_k)/3<<1)+1:0;
322 static inline celt_uint32 ncwrs5(int _k){
323 return _k?(((_k*(unsigned)_k+5)*(celt_uint32)_k*_k)/3<<2)+2:1;
326 #endif /* SMALL_FOOTPRINT */
328 /*Computes the next row/column of any recurrence that obeys the relation
329 u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
330 _ui0 is the base case for the new row/column.*/
331 static inline void unext(celt_uint32 *_ui,unsigned _len,celt_uint32 _ui0){
334 /*This do-while will overrun the array if we don't have storage for at least
337 ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
344 /*Computes the previous row/column of any recurrence that obeys the relation
345 u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
346 _ui0 is the base case for the new row/column.*/
347 static inline void uprev(celt_uint32 *_ui,unsigned _n,celt_uint32 _ui0){
350 /*This do-while will overrun the array if we don't have storage for at least
353 ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
360 /*Compute V(_n,_k), as well as U(_n,0..._k+1).
361 _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
362 static celt_uint32 ncwrs_urow(unsigned _n,unsigned _k,celt_uint32 *_u){
367 /*We require storage at least 3 values (e.g., _k>0).*/
371 #ifndef SMALL_FOOTPRINT
375 /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
376 /*If _n==1, _u[i] should be 1 for i>1.*/
378 /*If _k==0, the following do-while loop will overflow the buffer.*/
383 for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
385 #ifndef SMALL_FOOTPRINT
389 _u[2]=n2m1=um1=(_n<<1)-1;
391 /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
392 _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
394 _u[k]=um1=imusdiv32odd(n2m1,um2,um1,k-1>>1)+um1;
397 #endif /* SMALL_FOOTPRINT */
398 return _u[_k]+_u[_k+1];
401 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
402 set of size 1 with associated sign bits.
403 _y: Returns the vector of pulses.*/
404 static inline void cwrsi1(int _k,celt_uint32 _i,int *_y){
410 #ifndef SMALL_FOOTPRINT
412 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
413 set of size 2 with associated sign bits.
414 _y: Returns the vector of pulses.*/
415 static inline void cwrsi2(int _k,celt_uint32 _i,int *_y){
431 /*Returns the _i'th combination of _k elements (at most 32767) chosen from a
432 set of size 3 with associated sign bits.
433 _y: Returns the vector of pulses.*/
434 static void cwrsi3(int _k,celt_uint32 _i,int *_y){
442 /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
443 _i<2147418113=U(3,32768)).*/
444 _k=_i>0?isqrt32(2*_i-1)+1>>1:0;
452 /*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
453 of size 4 with associated sign bits.
454 _y: Returns the vector of pulses.*/
455 static void cwrsi4(int _k,celt_uint32 _i,int *_y){
465 /*We could solve a cubic for k here, but the form of the direct solution does
466 not lend itself well to exact integer arithmetic.
467 Instead we do a binary search on U(4,K).*/
477 else if(p>_i)kr=_k-1;
486 /*Returns the _i'th combination of _k elements (at most 238) chosen from a set
487 of size 5 with associated sign bits.
488 _y: Returns the vector of pulses.*/
489 static void cwrsi5(int _k,celt_uint32 _i,int *_y){
497 /* A binary search on U(5,K) avoids the need for 64-bit arithmetic */
508 else if(p>_i)kr=_k-1;
517 #endif /* SMALL_FOOTPRINT */
519 /*Returns the _i'th combination of _k elements chosen from a set of size _n
520 with associated sign bits.
521 _y: Returns the vector of pulses.
522 _u: Must contain entries [0..._k+1] of row _n of U() on input.
523 Its contents will be destructively modified.*/
524 static void cwrsi(int _n,int _k,celt_uint32 _i,int *_y,celt_uint32 *_u){
537 while(p>_i)p=_u[--_k];
547 /*Returns the index of the given combination of K elements chosen from a set
548 of size 1 with associated sign bits.
549 _y: The vector of pulses, whose sum of absolute values is K.
551 static inline celt_uint32 icwrs1(const int *_y,int *_k){
556 #ifndef SMALL_FOOTPRINT
558 /*Returns the index of the given combination of K elements chosen from a set
559 of size 2 with associated sign bits.
560 _y: The vector of pulses, whose sum of absolute values is K.
562 static inline celt_uint32 icwrs2(const int *_y,int *_k){
568 if(_y[0]<0)i+=ucwrs2(k+1U);
573 /*Returns the index of the given combination of K elements chosen from a set
574 of size 3 with associated sign bits.
575 _y: The vector of pulses, whose sum of absolute values is K.
577 static inline celt_uint32 icwrs3(const int *_y,int *_k){
583 if(_y[0]<0)i+=ucwrs3(k+1U);
588 /*Returns the index of the given combination of K elements chosen from a set
589 of size 4 with associated sign bits.
590 _y: The vector of pulses, whose sum of absolute values is K.
592 static inline celt_uint32 icwrs4(const int *_y,int *_k){
598 if(_y[0]<0)i+=ucwrs4(k+1);
603 /*Returns the index of the given combination of K elements chosen from a set
604 of size 5 with associated sign bits.
605 _y: The vector of pulses, whose sum of absolute values is K.
607 static inline celt_uint32 icwrs5(const int *_y,int *_k){
613 if(_y[0]<0)i+=ucwrs5(k+1);
617 #endif /* SMALL_FOOTPRINT */
619 /*Returns the index of the given combination of K elements chosen from a set
620 of size _n with associated sign bits.
621 _y: The vector of pulses, whose sum of absolute values must be _k.
622 _nc: Returns V(_n,_k).*/
623 celt_uint32 icwrs(int _n,int _k,celt_uint32 *_nc,const int *_y,
628 /*We can't unroll the first two iterations of the loop unless _n>=2.*/
631 for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
632 i=icwrs1(_y+_n-1,&k);
636 if(_y[j]<0)i+=_u[k+1];
641 if(_y[j]<0)i+=_u[k+1];
648 void get_required_bits(celt_int16 *_bits,int _n,int _maxk,int _frac){
650 /*_maxk==0 => there's nothing to do.*/
651 celt_assert(_maxk>0);
655 for (k=1;k<=_maxk;k++)
659 VARDECL(celt_uint32,u);
661 ALLOC(u,_maxk+2U,celt_uint32);
662 ncwrs_urow(_n,_maxk,u);
663 for(k=1;k<=_maxk;k++)
664 _bits[k]=log2_frac(u[k]+u[k+1],_frac);
668 #endif /* STATIC_MODES */
670 void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
677 celt_assert(ncwrs1(_k)==2);
678 ec_enc_bits(_enc,i,1);
680 #ifndef SMALL_FOOTPRINT
683 ec_enc_uint(_enc,i,ncwrs2(_k));
687 ec_enc_uint(_enc,i,ncwrs3(_k));
691 ec_enc_uint(_enc,i,ncwrs4(_k));
695 ec_enc_uint(_enc,i,ncwrs5(_k));
700 VARDECL(celt_uint32,u);
703 ALLOC(u,_k+2U,celt_uint32);
704 i=icwrs(_n,_k,&nc,_y,u);
705 ec_enc_uint(_enc,i,nc);
711 void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
721 celt_assert(ncwrs1(_k)==2);
722 cwrsi1(_k,ec_dec_bits(_dec,1),_y);
724 #ifndef SMALL_FOOTPRINT
725 case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
726 case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
727 case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
728 case 5:cwrsi5(_k,ec_dec_uint(_dec,ncwrs5(_k)),_y);break;
732 VARDECL(celt_uint32,u);
734 ALLOC(u,_k+2U,celt_uint32);
735 cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);