Alexandre Lision | 744f742 | 2013-09-25 11:39:37 -0400 | [diff] [blame] | 1 | /* Copyright (c) 2007-2008 CSIRO |
| 2 | Copyright (c) 2007-2009 Xiph.Org Foundation |
| 3 | Copyright (c) 2007-2009 Timothy B. Terriberry |
| 4 | Written by Timothy B. Terriberry and Jean-Marc Valin */ |
| 5 | /* |
| 6 | Redistribution and use in source and binary forms, with or without |
| 7 | modification, are permitted provided that the following conditions |
| 8 | are met: |
| 9 | |
| 10 | - Redistributions of source code must retain the above copyright |
| 11 | notice, this list of conditions and the following disclaimer. |
| 12 | |
| 13 | - Redistributions in binary form must reproduce the above copyright |
| 14 | notice, this list of conditions and the following disclaimer in the |
| 15 | documentation and/or other materials provided with the distribution. |
| 16 | |
| 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 18 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
| 21 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 24 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 25 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 26 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | */ |
| 29 | |
| 30 | #ifdef HAVE_CONFIG_H |
| 31 | #include "config.h" |
| 32 | #endif |
| 33 | |
| 34 | #include "os_support.h" |
| 35 | #include "cwrs.h" |
| 36 | #include "mathops.h" |
| 37 | #include "arch.h" |
| 38 | |
| 39 | #ifdef CUSTOM_MODES |
| 40 | |
| 41 | /*Guaranteed to return a conservatively large estimate of the binary logarithm |
| 42 | with frac bits of fractional precision. |
| 43 | Tested for all possible 32-bit inputs with frac=4, where the maximum |
| 44 | overestimation is 0.06254243 bits.*/ |
| 45 | int log2_frac(opus_uint32 val, int frac) |
| 46 | { |
| 47 | int l; |
| 48 | l=EC_ILOG(val); |
| 49 | if(val&(val-1)){ |
| 50 | /*This is (val>>l-16), but guaranteed to round up, even if adding a bias |
| 51 | before the shift would cause overflow (e.g., for 0xFFFFxxxx). |
| 52 | Doesn't work for val=0, but that case fails the test above.*/ |
| 53 | if(l>16)val=((val-1)>>(l-16))+1; |
| 54 | else val<<=16-l; |
| 55 | l=(l-1)<<frac; |
| 56 | /*Note that we always need one iteration, since the rounding up above means |
| 57 | that we might need to adjust the integer part of the logarithm.*/ |
| 58 | do{ |
| 59 | int b; |
| 60 | b=(int)(val>>16); |
| 61 | l+=b<<frac; |
| 62 | val=(val+b)>>b; |
| 63 | val=(val*val+0x7FFF)>>15; |
| 64 | } |
| 65 | while(frac-->0); |
| 66 | /*If val is not exactly 0x8000, then we have to round up the remainder.*/ |
| 67 | return l+(val>0x8000); |
| 68 | } |
| 69 | /*Exact powers of two require no rounding.*/ |
| 70 | else return (l-1)<<frac; |
| 71 | } |
| 72 | #endif |
| 73 | |
| 74 | #ifndef SMALL_FOOTPRINT |
| 75 | |
| 76 | #define MASK32 (0xFFFFFFFF) |
| 77 | |
| 78 | /*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/ |
| 79 | static const opus_uint32 INV_TABLE[53]={ |
| 80 | 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7, |
| 81 | 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF, |
| 82 | 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7, |
| 83 | 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF, |
| 84 | 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97, |
| 85 | 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF, |
| 86 | 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587, |
| 87 | 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF, |
| 88 | 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977, |
| 89 | 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF, |
| 90 | 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67, |
| 91 | 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F, |
| 92 | 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57, |
| 93 | 0xD8FD8FD9, |
| 94 | }; |
| 95 | |
| 96 | /*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact. |
| 97 | _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result |
| 98 | fits in 32 bits, but currently the table for multiplicative inverses is only |
| 99 | valid for _d<=52.*/ |
| 100 | static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b, |
| 101 | opus_uint32 _c,int _d){ |
| 102 | celt_assert(_d<=52); |
| 103 | return (_a*_b-_c)*INV_TABLE[_d]&MASK32; |
| 104 | } |
| 105 | |
| 106 | /*Computes (_a*_b-_c)/_d when the quotient is known to be exact. |
| 107 | _d does not actually have to be even, but imusdiv32odd will be faster when |
| 108 | it's odd, so you should use that instead. |
| 109 | _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the |
| 110 | table for multiplicative inverses is only valid for _d<=54). |
| 111 | _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in |
| 112 | 32 bits.*/ |
| 113 | static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b, |
| 114 | opus_uint32 _c,int _d){ |
| 115 | opus_uint32 inv; |
| 116 | int mask; |
| 117 | int shift; |
| 118 | int one; |
| 119 | celt_assert(_d>0); |
| 120 | celt_assert(_d<=54); |
| 121 | shift=EC_ILOG(_d^(_d-1)); |
| 122 | inv=INV_TABLE[(_d-1)>>shift]; |
| 123 | shift--; |
| 124 | one=1<<shift; |
| 125 | mask=one-1; |
| 126 | return (_a*(_b>>shift)-(_c>>shift)+ |
| 127 | ((_a*(_b&mask)+one-(_c&mask))>>shift)-1)*inv&MASK32; |
| 128 | } |
| 129 | |
| 130 | #endif /* SMALL_FOOTPRINT */ |
| 131 | |
| 132 | /*Although derived separately, the pulse vector coding scheme is equivalent to |
| 133 | a Pyramid Vector Quantizer \cite{Fis86}. |
| 134 | Some additional notes about an early version appear at |
| 135 | http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering |
| 136 | and the definitions of some terms have evolved since that was written. |
| 137 | |
| 138 | The conversion from a pulse vector to an integer index (encoding) and back |
| 139 | (decoding) is governed by two related functions, V(N,K) and U(N,K). |
| 140 | |
| 141 | V(N,K) = the number of combinations, with replacement, of N items, taken K |
| 142 | at a time, when a sign bit is added to each item taken at least once (i.e., |
| 143 | the number of N-dimensional unit pulse vectors with K pulses). |
| 144 | One way to compute this is via |
| 145 | V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1, |
| 146 | where choose() is the binomial function. |
| 147 | A table of values for N<10 and K<10 looks like: |
| 148 | V[10][10] = { |
| 149 | {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| 150 | {1, 2, 2, 2, 2, 2, 2, 2, 2, 2}, |
| 151 | {1, 4, 8, 12, 16, 20, 24, 28, 32, 36}, |
| 152 | {1, 6, 18, 38, 66, 102, 146, 198, 258, 326}, |
| 153 | {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992}, |
| 154 | {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290}, |
| 155 | {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436}, |
| 156 | {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598}, |
| 157 | {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688}, |
| 158 | {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146} |
| 159 | }; |
| 160 | |
| 161 | U(N,K) = the number of such combinations wherein N-1 objects are taken at |
| 162 | most K-1 at a time. |
| 163 | This is given by |
| 164 | U(N,K) = sum(k=0...K-1,V(N-1,k)) |
| 165 | = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0. |
| 166 | The latter expression also makes clear that U(N,K) is half the number of such |
| 167 | combinations wherein the first object is taken at least once. |
| 168 | Although it may not be clear from either of these definitions, U(N,K) is the |
| 169 | natural function to work with when enumerating the pulse vector codebooks, |
| 170 | not V(N,K). |
| 171 | U(N,K) is not well-defined for N=0, but with the extension |
| 172 | U(0,K) = K>0 ? 0 : 1, |
| 173 | the function becomes symmetric: U(N,K) = U(K,N), with a similar table: |
| 174 | U[10][10] = { |
| 175 | {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| 176 | {0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
| 177 | {0, 1, 3, 5, 7, 9, 11, 13, 15, 17}, |
| 178 | {0, 1, 5, 13, 25, 41, 61, 85, 113, 145}, |
| 179 | {0, 1, 7, 25, 63, 129, 231, 377, 575, 833}, |
| 180 | {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649}, |
| 181 | {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073}, |
| 182 | {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081}, |
| 183 | {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545}, |
| 184 | {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729} |
| 185 | }; |
| 186 | |
| 187 | With this extension, V(N,K) may be written in terms of U(N,K): |
| 188 | V(N,K) = U(N,K) + U(N,K+1) |
| 189 | for all N>=0, K>=0. |
| 190 | Thus U(N,K+1) represents the number of combinations where the first element |
| 191 | is positive or zero, and U(N,K) represents the number of combinations where |
| 192 | it is negative. |
| 193 | With a large enough table of U(N,K) values, we could write O(N) encoding |
| 194 | and O(min(N*log(K),N+K)) decoding routines, but such a table would be |
| 195 | prohibitively large for small embedded devices (K may be as large as 32767 |
| 196 | for small N, and N may be as large as 200). |
| 197 | |
| 198 | Both functions obey the same recurrence relation: |
| 199 | V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1), |
| 200 | U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1), |
| 201 | for all N>0, K>0, with different initial conditions at N=0 or K=0. |
| 202 | This allows us to construct a row of one of the tables above given the |
| 203 | previous row or the next row. |
| 204 | Thus we can derive O(NK) encoding and decoding routines with O(K) memory |
| 205 | using only addition and subtraction. |
| 206 | |
| 207 | When encoding, we build up from the U(2,K) row and work our way forwards. |
| 208 | When decoding, we need to start at the U(N,K) row and work our way backwards, |
| 209 | which requires a means of computing U(N,K). |
| 210 | U(N,K) may be computed from two previous values with the same N: |
| 211 | U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2) |
| 212 | for all N>1, and since U(N,K) is symmetric, a similar relation holds for two |
| 213 | previous values with the same K: |
| 214 | U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K) |
| 215 | for all K>1. |
| 216 | This allows us to construct an arbitrary row of the U(N,K) table by starting |
| 217 | with the first two values, which are constants. |
| 218 | This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K) |
| 219 | multiplications. |
| 220 | Similar relations can be derived for V(N,K), but are not used here. |
| 221 | |
| 222 | For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree |
| 223 | polynomial for fixed N. |
| 224 | The first few are |
| 225 | U(1,K) = 1, |
| 226 | U(2,K) = 2*K-1, |
| 227 | U(3,K) = (2*K-2)*K+1, |
| 228 | U(4,K) = (((4*K-6)*K+8)*K-3)/3, |
| 229 | U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3, |
| 230 | and |
| 231 | V(1,K) = 2, |
| 232 | V(2,K) = 4*K, |
| 233 | V(3,K) = 4*K*K+2, |
| 234 | V(4,K) = 8*(K*K+2)*K/3, |
| 235 | V(5,K) = ((4*K*K+20)*K*K+6)/3, |
| 236 | for all K>0. |
| 237 | This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for |
| 238 | small N (and indeed decoding is also O(N) for N<3). |
| 239 | |
| 240 | @ARTICLE{Fis86, |
| 241 | author="Thomas R. Fischer", |
| 242 | title="A Pyramid Vector Quantizer", |
| 243 | journal="IEEE Transactions on Information Theory", |
| 244 | volume="IT-32", |
| 245 | number=4, |
| 246 | pages="568--583", |
| 247 | month=Jul, |
| 248 | year=1986 |
| 249 | }*/ |
| 250 | |
| 251 | #ifndef SMALL_FOOTPRINT |
| 252 | /*Compute U(2,_k). |
| 253 | Note that this may be called with _k=32768 (maxK[2]+1).*/ |
| 254 | static inline unsigned ucwrs2(unsigned _k){ |
| 255 | celt_assert(_k>0); |
| 256 | return _k+(_k-1); |
| 257 | } |
| 258 | |
| 259 | /*Compute V(2,_k).*/ |
| 260 | static inline opus_uint32 ncwrs2(int _k){ |
| 261 | celt_assert(_k>0); |
| 262 | return 4*(opus_uint32)_k; |
| 263 | } |
| 264 | |
| 265 | /*Compute U(3,_k). |
| 266 | Note that this may be called with _k=32768 (maxK[3]+1).*/ |
| 267 | static inline opus_uint32 ucwrs3(unsigned _k){ |
| 268 | celt_assert(_k>0); |
| 269 | return (2*(opus_uint32)_k-2)*_k+1; |
| 270 | } |
| 271 | |
| 272 | /*Compute V(3,_k).*/ |
| 273 | static inline opus_uint32 ncwrs3(int _k){ |
| 274 | celt_assert(_k>0); |
| 275 | return 2*(2*(unsigned)_k*(opus_uint32)_k+1); |
| 276 | } |
| 277 | |
| 278 | /*Compute U(4,_k).*/ |
| 279 | static inline opus_uint32 ucwrs4(int _k){ |
| 280 | celt_assert(_k>0); |
| 281 | return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1); |
| 282 | } |
| 283 | |
| 284 | /*Compute V(4,_k).*/ |
| 285 | static inline opus_uint32 ncwrs4(int _k){ |
| 286 | celt_assert(_k>0); |
| 287 | return ((_k*(opus_uint32)_k+2)*_k)/3<<3; |
| 288 | } |
| 289 | |
| 290 | #endif /* SMALL_FOOTPRINT */ |
| 291 | |
| 292 | /*Computes the next row/column of any recurrence that obeys the relation |
| 293 | u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1]. |
| 294 | _ui0 is the base case for the new row/column.*/ |
| 295 | static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){ |
| 296 | opus_uint32 ui1; |
| 297 | unsigned j; |
| 298 | /*This do-while will overrun the array if we don't have storage for at least |
| 299 | 2 values.*/ |
| 300 | j=1; do { |
| 301 | ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0); |
| 302 | _ui[j-1]=_ui0; |
| 303 | _ui0=ui1; |
| 304 | } while (++j<_len); |
| 305 | _ui[j-1]=_ui0; |
| 306 | } |
| 307 | |
| 308 | /*Computes the previous row/column of any recurrence that obeys the relation |
| 309 | u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1]. |
| 310 | _ui0 is the base case for the new row/column.*/ |
| 311 | static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){ |
| 312 | opus_uint32 ui1; |
| 313 | unsigned j; |
| 314 | /*This do-while will overrun the array if we don't have storage for at least |
| 315 | 2 values.*/ |
| 316 | j=1; do { |
| 317 | ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0); |
| 318 | _ui[j-1]=_ui0; |
| 319 | _ui0=ui1; |
| 320 | } while (++j<_n); |
| 321 | _ui[j-1]=_ui0; |
| 322 | } |
| 323 | |
| 324 | /*Compute V(_n,_k), as well as U(_n,0..._k+1). |
| 325 | _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/ |
| 326 | static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ |
| 327 | opus_uint32 um2; |
| 328 | unsigned len; |
| 329 | unsigned k; |
| 330 | len=_k+2; |
| 331 | /*We require storage at least 3 values (e.g., _k>0).*/ |
| 332 | celt_assert(len>=3); |
| 333 | _u[0]=0; |
| 334 | _u[1]=um2=1; |
| 335 | #ifndef SMALL_FOOTPRINT |
| 336 | /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE, |
| 337 | but _k isn't tested here because k<=52 for n=7*/ |
| 338 | if(_n<=6) |
| 339 | #endif |
| 340 | { |
| 341 | /*If _n==0, _u[0] should be 1 and the rest should be 0.*/ |
| 342 | /*If _n==1, _u[i] should be 1 for i>1.*/ |
| 343 | celt_assert(_n>=2); |
| 344 | /*If _k==0, the following do-while loop will overflow the buffer.*/ |
| 345 | celt_assert(_k>0); |
| 346 | k=2; |
| 347 | do _u[k]=(k<<1)-1; |
| 348 | while(++k<len); |
| 349 | for(k=2;k<_n;k++)unext(_u+1,_k+1,1); |
| 350 | } |
| 351 | #ifndef SMALL_FOOTPRINT |
| 352 | else{ |
| 353 | opus_uint32 um1; |
| 354 | opus_uint32 n2m1; |
| 355 | _u[2]=n2m1=um1=(_n<<1)-1; |
| 356 | for(k=3;k<len;k++){ |
| 357 | /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/ |
| 358 | _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2; |
| 359 | if(++k>=len)break; |
| 360 | _u[k]=um1=imusdiv32odd(n2m1,um2,um1,(k-1)>>1)+um1; |
| 361 | } |
| 362 | } |
| 363 | #endif /* SMALL_FOOTPRINT */ |
| 364 | return _u[_k]+_u[_k+1]; |
| 365 | } |
| 366 | |
| 367 | #ifndef SMALL_FOOTPRINT |
| 368 | |
| 369 | /*Returns the _i'th combination of _k elements (at most 32767) chosen from a |
| 370 | set of size 1 with associated sign bits. |
| 371 | _y: Returns the vector of pulses.*/ |
| 372 | static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){ |
| 373 | int s; |
| 374 | s=-(int)_i; |
| 375 | _y[0]=(_k+s)^s; |
| 376 | } |
| 377 | |
| 378 | /*Returns the _i'th combination of _k elements (at most 32767) chosen from a |
| 379 | set of size 2 with associated sign bits. |
| 380 | _y: Returns the vector of pulses.*/ |
| 381 | static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){ |
| 382 | opus_uint32 p; |
| 383 | int s; |
| 384 | int yj; |
| 385 | p=ucwrs2(_k+1U); |
| 386 | s=-(_i>=p); |
| 387 | _i-=p&s; |
| 388 | yj=_k; |
| 389 | _k=(_i+1)>>1; |
| 390 | p=_k?ucwrs2(_k):0; |
| 391 | _i-=p; |
| 392 | yj-=_k; |
| 393 | _y[0]=(yj+s)^s; |
| 394 | cwrsi1(_k,_i,_y+1); |
| 395 | } |
| 396 | |
| 397 | /*Returns the _i'th combination of _k elements (at most 32767) chosen from a |
| 398 | set of size 3 with associated sign bits. |
| 399 | _y: Returns the vector of pulses.*/ |
| 400 | static void cwrsi3(int _k,opus_uint32 _i,int *_y){ |
| 401 | opus_uint32 p; |
| 402 | int s; |
| 403 | int yj; |
| 404 | p=ucwrs3(_k+1U); |
| 405 | s=-(_i>=p); |
| 406 | _i-=p&s; |
| 407 | yj=_k; |
| 408 | /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all |
| 409 | _i<2147418113=U(3,32768)).*/ |
| 410 | _k=_i>0?(isqrt32(2*_i-1)+1)>>1:0; |
| 411 | p=_k?ucwrs3(_k):0; |
| 412 | _i-=p; |
| 413 | yj-=_k; |
| 414 | _y[0]=(yj+s)^s; |
| 415 | cwrsi2(_k,_i,_y+1); |
| 416 | } |
| 417 | |
| 418 | /*Returns the _i'th combination of _k elements (at most 1172) chosen from a set |
| 419 | of size 4 with associated sign bits. |
| 420 | _y: Returns the vector of pulses.*/ |
| 421 | static void cwrsi4(int _k,opus_uint32 _i,int *_y){ |
| 422 | opus_uint32 p; |
| 423 | int s; |
| 424 | int yj; |
| 425 | int kl; |
| 426 | int kr; |
| 427 | p=ucwrs4(_k+1); |
| 428 | s=-(_i>=p); |
| 429 | _i-=p&s; |
| 430 | yj=_k; |
| 431 | /*We could solve a cubic for k here, but the form of the direct solution does |
| 432 | not lend itself well to exact integer arithmetic. |
| 433 | Instead we do a binary search on U(4,K).*/ |
| 434 | kl=0; |
| 435 | kr=_k; |
| 436 | for(;;){ |
| 437 | _k=(kl+kr)>>1; |
| 438 | p=_k?ucwrs4(_k):0; |
| 439 | if(p<_i){ |
| 440 | if(_k>=kr)break; |
| 441 | kl=_k+1; |
| 442 | } |
| 443 | else if(p>_i)kr=_k-1; |
| 444 | else break; |
| 445 | } |
| 446 | _i-=p; |
| 447 | yj-=_k; |
| 448 | _y[0]=(yj+s)^s; |
| 449 | cwrsi3(_k,_i,_y+1); |
| 450 | } |
| 451 | |
| 452 | #endif /* SMALL_FOOTPRINT */ |
| 453 | |
| 454 | /*Returns the _i'th combination of _k elements chosen from a set of size _n |
| 455 | with associated sign bits. |
| 456 | _y: Returns the vector of pulses. |
| 457 | _u: Must contain entries [0..._k+1] of row _n of U() on input. |
| 458 | Its contents will be destructively modified.*/ |
| 459 | static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ |
| 460 | int j; |
| 461 | celt_assert(_n>0); |
| 462 | j=0; |
| 463 | do{ |
| 464 | opus_uint32 p; |
| 465 | int s; |
| 466 | int yj; |
| 467 | p=_u[_k+1]; |
| 468 | s=-(_i>=p); |
| 469 | _i-=p&s; |
| 470 | yj=_k; |
| 471 | p=_u[_k]; |
| 472 | while(p>_i)p=_u[--_k]; |
| 473 | _i-=p; |
| 474 | yj-=_k; |
| 475 | _y[j]=(yj+s)^s; |
| 476 | uprev(_u,_k+2,0); |
| 477 | } |
| 478 | while(++j<_n); |
| 479 | } |
| 480 | |
| 481 | /*Returns the index of the given combination of K elements chosen from a set |
| 482 | of size 1 with associated sign bits. |
| 483 | _y: The vector of pulses, whose sum of absolute values is K. |
| 484 | _k: Returns K.*/ |
| 485 | static inline opus_uint32 icwrs1(const int *_y,int *_k){ |
| 486 | *_k=abs(_y[0]); |
| 487 | return _y[0]<0; |
| 488 | } |
| 489 | |
| 490 | #ifndef SMALL_FOOTPRINT |
| 491 | |
| 492 | /*Returns the index of the given combination of K elements chosen from a set |
| 493 | of size 2 with associated sign bits. |
| 494 | _y: The vector of pulses, whose sum of absolute values is K. |
| 495 | _k: Returns K.*/ |
| 496 | static inline opus_uint32 icwrs2(const int *_y,int *_k){ |
| 497 | opus_uint32 i; |
| 498 | int k; |
| 499 | i=icwrs1(_y+1,&k); |
| 500 | i+=k?ucwrs2(k):0; |
| 501 | k+=abs(_y[0]); |
| 502 | if(_y[0]<0)i+=ucwrs2(k+1U); |
| 503 | *_k=k; |
| 504 | return i; |
| 505 | } |
| 506 | |
| 507 | /*Returns the index of the given combination of K elements chosen from a set |
| 508 | of size 3 with associated sign bits. |
| 509 | _y: The vector of pulses, whose sum of absolute values is K. |
| 510 | _k: Returns K.*/ |
| 511 | static inline opus_uint32 icwrs3(const int *_y,int *_k){ |
| 512 | opus_uint32 i; |
| 513 | int k; |
| 514 | i=icwrs2(_y+1,&k); |
| 515 | i+=k?ucwrs3(k):0; |
| 516 | k+=abs(_y[0]); |
| 517 | if(_y[0]<0)i+=ucwrs3(k+1U); |
| 518 | *_k=k; |
| 519 | return i; |
| 520 | } |
| 521 | |
| 522 | /*Returns the index of the given combination of K elements chosen from a set |
| 523 | of size 4 with associated sign bits. |
| 524 | _y: The vector of pulses, whose sum of absolute values is K. |
| 525 | _k: Returns K.*/ |
| 526 | static inline opus_uint32 icwrs4(const int *_y,int *_k){ |
| 527 | opus_uint32 i; |
| 528 | int k; |
| 529 | i=icwrs3(_y+1,&k); |
| 530 | i+=k?ucwrs4(k):0; |
| 531 | k+=abs(_y[0]); |
| 532 | if(_y[0]<0)i+=ucwrs4(k+1); |
| 533 | *_k=k; |
| 534 | return i; |
| 535 | } |
| 536 | |
| 537 | #endif /* SMALL_FOOTPRINT */ |
| 538 | |
| 539 | /*Returns the index of the given combination of K elements chosen from a set |
| 540 | of size _n with associated sign bits. |
| 541 | _y: The vector of pulses, whose sum of absolute values must be _k. |
| 542 | _nc: Returns V(_n,_k).*/ |
| 543 | static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, |
| 544 | opus_uint32 *_u){ |
| 545 | opus_uint32 i; |
| 546 | int j; |
| 547 | int k; |
| 548 | /*We can't unroll the first two iterations of the loop unless _n>=2.*/ |
| 549 | celt_assert(_n>=2); |
| 550 | _u[0]=0; |
| 551 | for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1; |
| 552 | i=icwrs1(_y+_n-1,&k); |
| 553 | j=_n-2; |
| 554 | i+=_u[k]; |
| 555 | k+=abs(_y[j]); |
| 556 | if(_y[j]<0)i+=_u[k+1]; |
| 557 | while(j-->0){ |
| 558 | unext(_u,_k+2,0); |
| 559 | i+=_u[k]; |
| 560 | k+=abs(_y[j]); |
| 561 | if(_y[j]<0)i+=_u[k+1]; |
| 562 | } |
| 563 | *_nc=_u[k]+_u[k+1]; |
| 564 | return i; |
| 565 | } |
| 566 | |
| 567 | #ifdef CUSTOM_MODES |
| 568 | void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ |
| 569 | int k; |
| 570 | /*_maxk==0 => there's nothing to do.*/ |
| 571 | celt_assert(_maxk>0); |
| 572 | _bits[0]=0; |
| 573 | if (_n==1) |
| 574 | { |
| 575 | for (k=1;k<=_maxk;k++) |
| 576 | _bits[k] = 1<<_frac; |
| 577 | } |
| 578 | else { |
| 579 | VARDECL(opus_uint32,u); |
| 580 | SAVE_STACK; |
| 581 | ALLOC(u,_maxk+2U,opus_uint32); |
| 582 | ncwrs_urow(_n,_maxk,u); |
| 583 | for(k=1;k<=_maxk;k++) |
| 584 | _bits[k]=log2_frac(u[k]+u[k+1],_frac); |
| 585 | RESTORE_STACK; |
| 586 | } |
| 587 | } |
| 588 | #endif /* CUSTOM_MODES */ |
| 589 | |
| 590 | void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ |
| 591 | opus_uint32 i; |
| 592 | celt_assert(_k>0); |
| 593 | #ifndef SMALL_FOOTPRINT |
| 594 | switch(_n){ |
| 595 | case 2:{ |
| 596 | i=icwrs2(_y,&_k); |
| 597 | ec_enc_uint(_enc,i,ncwrs2(_k)); |
| 598 | }break; |
| 599 | case 3:{ |
| 600 | i=icwrs3(_y,&_k); |
| 601 | ec_enc_uint(_enc,i,ncwrs3(_k)); |
| 602 | }break; |
| 603 | case 4:{ |
| 604 | i=icwrs4(_y,&_k); |
| 605 | ec_enc_uint(_enc,i,ncwrs4(_k)); |
| 606 | }break; |
| 607 | default: |
| 608 | { |
| 609 | #endif |
| 610 | VARDECL(opus_uint32,u); |
| 611 | opus_uint32 nc; |
| 612 | SAVE_STACK; |
| 613 | ALLOC(u,_k+2U,opus_uint32); |
| 614 | i=icwrs(_n,_k,&nc,_y,u); |
| 615 | ec_enc_uint(_enc,i,nc); |
| 616 | RESTORE_STACK; |
| 617 | #ifndef SMALL_FOOTPRINT |
| 618 | } |
| 619 | break; |
| 620 | } |
| 621 | #endif |
| 622 | } |
| 623 | |
| 624 | void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec) |
| 625 | { |
| 626 | celt_assert(_k>0); |
| 627 | #ifndef SMALL_FOOTPRINT |
| 628 | switch(_n){ |
| 629 | case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break; |
| 630 | case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break; |
| 631 | case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break; |
| 632 | default: |
| 633 | { |
| 634 | #endif |
| 635 | VARDECL(opus_uint32,u); |
| 636 | SAVE_STACK; |
| 637 | ALLOC(u,_k+2U,opus_uint32); |
| 638 | cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); |
| 639 | RESTORE_STACK; |
| 640 | #ifndef SMALL_FOOTPRINT |
| 641 | } |
| 642 | break; |
| 643 | } |
| 644 | #endif |
| 645 | } |