Tristan Matthews | 0a329cc | 2013-07-17 13:20:14 -0400 | [diff] [blame] | 1 | /* Copyright (C) 2002 Jean-Marc Valin |
| 2 | File: stereo.c |
| 3 | |
| 4 | Redistribution and use in source and binary forms, with or without |
| 5 | modification, are permitted provided that the following conditions |
| 6 | are met: |
| 7 | |
| 8 | - Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | |
| 11 | - Redistributions in binary form must reproduce the above copyright |
| 12 | notice, this list of conditions and the following disclaimer in the |
| 13 | documentation and/or other materials provided with the distribution. |
| 14 | |
| 15 | - Neither the name of the Xiph.org Foundation nor the names of its |
| 16 | contributors may be used to endorse or promote products derived from |
| 17 | this software without specific prior written permission. |
| 18 | |
| 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | #ifdef HAVE_CONFIG_H |
| 33 | #include "config.h" |
| 34 | #endif |
| 35 | |
| 36 | #include <speex/speex_stereo.h> |
| 37 | #include <speex/speex_callbacks.h> |
| 38 | #include "math_approx.h" |
| 39 | #include "vq.h" |
| 40 | #include <math.h> |
| 41 | #include "os_support.h" |
| 42 | |
| 43 | typedef struct RealSpeexStereoState { |
| 44 | spx_word32_t balance; /**< Left/right balance info */ |
| 45 | spx_word32_t e_ratio; /**< Ratio of energies: E(left+right)/[E(left)+E(right)] */ |
| 46 | spx_word32_t smooth_left; /**< Smoothed left channel gain */ |
| 47 | spx_word32_t smooth_right; /**< Smoothed right channel gain */ |
| 48 | spx_uint32_t reserved1; /**< Reserved for future use */ |
| 49 | spx_int32_t reserved2; /**< Reserved for future use */ |
| 50 | } RealSpeexStereoState; |
| 51 | |
| 52 | |
| 53 | /*float e_ratio_quant[4] = {1, 1.26, 1.587, 2};*/ |
| 54 | #ifndef FIXED_POINT |
| 55 | static const float e_ratio_quant[4] = {.25f, .315f, .397f, .5f}; |
| 56 | static const float e_ratio_quant_bounds[3] = {0.2825f, 0.356f, 0.4485f}; |
| 57 | #else |
| 58 | static const spx_word16_t e_ratio_quant[4] = {8192, 10332, 13009, 16384}; |
| 59 | static const spx_word16_t e_ratio_quant_bounds[3] = {9257, 11665, 14696}; |
| 60 | static const spx_word16_t balance_bounds[31] = {18, 23, 30, 38, 49, 63, 81, 104, |
| 61 | 134, 172, 221, 284, 364, 468, 600, 771, |
| 62 | 990, 1271, 1632, 2096, 2691, 3455, 4436, 5696, |
| 63 | 7314, 9392, 12059, 15484, 19882, 25529, 32766}; |
| 64 | #endif |
| 65 | |
| 66 | /* This is an ugly compatibility hack that properly resets the stereo state |
| 67 | In case it it compiled in fixed-point, but initialised with the deprecated |
| 68 | floating point static initialiser */ |
| 69 | #ifdef FIXED_POINT |
| 70 | #define COMPATIBILITY_HACK(s) do {if ((s)->reserved1 != 0xdeadbeef) speex_stereo_state_reset((SpeexStereoState*)s); } while (0); |
| 71 | #else |
| 72 | #define COMPATIBILITY_HACK(s) |
| 73 | #endif |
| 74 | |
| 75 | EXPORT SpeexStereoState *speex_stereo_state_init() |
| 76 | { |
| 77 | SpeexStereoState *stereo = speex_alloc(sizeof(SpeexStereoState)); |
| 78 | speex_stereo_state_reset(stereo); |
| 79 | return stereo; |
| 80 | } |
| 81 | |
| 82 | EXPORT void speex_stereo_state_reset(SpeexStereoState *_stereo) |
| 83 | { |
| 84 | RealSpeexStereoState *stereo = (RealSpeexStereoState*)_stereo; |
| 85 | #ifdef FIXED_POINT |
| 86 | stereo->balance = 65536; |
| 87 | stereo->e_ratio = 16384; |
| 88 | stereo->smooth_left = 16384; |
| 89 | stereo->smooth_right = 16384; |
| 90 | stereo->reserved1 = 0xdeadbeef; |
| 91 | stereo->reserved2 = 0; |
| 92 | #else |
| 93 | stereo->balance = 1.0f; |
| 94 | stereo->e_ratio = .5f; |
| 95 | stereo->smooth_left = 1.f; |
| 96 | stereo->smooth_right = 1.f; |
| 97 | stereo->reserved1 = 0; |
| 98 | stereo->reserved2 = 0; |
| 99 | #endif |
| 100 | } |
| 101 | |
| 102 | EXPORT void speex_stereo_state_destroy(SpeexStereoState *stereo) |
| 103 | { |
| 104 | speex_free(stereo); |
| 105 | } |
| 106 | |
| 107 | #ifndef DISABLE_FLOAT_API |
| 108 | EXPORT void speex_encode_stereo(float *data, int frame_size, SpeexBits *bits) |
| 109 | { |
| 110 | int i, tmp; |
| 111 | float e_left=0, e_right=0, e_tot=0; |
| 112 | float balance, e_ratio; |
| 113 | for (i=0;i<frame_size;i++) |
| 114 | { |
| 115 | e_left += ((float)data[2*i])*data[2*i]; |
| 116 | e_right += ((float)data[2*i+1])*data[2*i+1]; |
| 117 | data[i] = .5*(((float)data[2*i])+data[2*i+1]); |
| 118 | e_tot += ((float)data[i])*data[i]; |
| 119 | } |
| 120 | balance=(e_left+1)/(e_right+1); |
| 121 | e_ratio = e_tot/(1+e_left+e_right); |
| 122 | |
| 123 | /*Quantization*/ |
| 124 | speex_bits_pack(bits, 14, 5); |
| 125 | speex_bits_pack(bits, SPEEX_INBAND_STEREO, 4); |
| 126 | |
| 127 | balance=4*log(balance); |
| 128 | |
| 129 | /*Pack sign*/ |
| 130 | if (balance>0) |
| 131 | speex_bits_pack(bits, 0, 1); |
| 132 | else |
| 133 | speex_bits_pack(bits, 1, 1); |
| 134 | balance=floor(.5+fabs(balance)); |
| 135 | if (balance>30) |
| 136 | balance=31; |
| 137 | |
| 138 | speex_bits_pack(bits, (int)balance, 5); |
| 139 | |
| 140 | /* FIXME: this is a hack */ |
| 141 | tmp=scal_quant(e_ratio*Q15_ONE, e_ratio_quant_bounds, 4); |
| 142 | speex_bits_pack(bits, tmp, 2); |
| 143 | } |
| 144 | #endif /* #ifndef DISABLE_FLOAT_API */ |
| 145 | |
| 146 | EXPORT void speex_encode_stereo_int(spx_int16_t *data, int frame_size, SpeexBits *bits) |
| 147 | { |
| 148 | int i, tmp; |
| 149 | spx_word32_t e_left=0, e_right=0, e_tot=0; |
| 150 | spx_word32_t balance, e_ratio; |
| 151 | spx_word32_t largest, smallest; |
| 152 | int balance_id; |
| 153 | #ifdef FIXED_POINT |
| 154 | int shift; |
| 155 | #endif |
| 156 | |
| 157 | /* In band marker */ |
| 158 | speex_bits_pack(bits, 14, 5); |
| 159 | /* Stereo marker */ |
| 160 | speex_bits_pack(bits, SPEEX_INBAND_STEREO, 4); |
| 161 | |
| 162 | for (i=0;i<frame_size;i++) |
| 163 | { |
| 164 | e_left += SHR32(MULT16_16(data[2*i],data[2*i]),8); |
| 165 | e_right += SHR32(MULT16_16(data[2*i+1],data[2*i+1]),8); |
| 166 | #ifdef FIXED_POINT |
| 167 | /* I think this is actually unbiased */ |
| 168 | data[i] = SHR16(data[2*i],1)+PSHR16(data[2*i+1],1); |
| 169 | #else |
| 170 | data[i] = .5*(((float)data[2*i])+data[2*i+1]); |
| 171 | #endif |
| 172 | e_tot += SHR32(MULT16_16(data[i],data[i]),8); |
| 173 | } |
| 174 | if (e_left > e_right) |
| 175 | { |
| 176 | speex_bits_pack(bits, 0, 1); |
| 177 | largest = e_left; |
| 178 | smallest = e_right; |
| 179 | } else { |
| 180 | speex_bits_pack(bits, 1, 1); |
| 181 | largest = e_right; |
| 182 | smallest = e_left; |
| 183 | } |
| 184 | |
| 185 | /* Balance quantization */ |
| 186 | #ifdef FIXED_POINT |
| 187 | shift = spx_ilog2(largest)-15; |
| 188 | largest = VSHR32(largest, shift-4); |
| 189 | smallest = VSHR32(smallest, shift); |
| 190 | balance = DIV32(largest, ADD32(smallest, 1)); |
| 191 | if (balance > 32767) |
| 192 | balance = 32767; |
| 193 | balance_id = scal_quant(EXTRACT16(balance), balance_bounds, 32); |
| 194 | #else |
| 195 | balance=(largest+1.)/(smallest+1.); |
| 196 | balance=4*log(balance); |
| 197 | balance_id=floor(.5+fabs(balance)); |
| 198 | if (balance_id>30) |
| 199 | balance_id=31; |
| 200 | #endif |
| 201 | |
| 202 | speex_bits_pack(bits, balance_id, 5); |
| 203 | |
| 204 | /* "coherence" quantisation */ |
| 205 | #ifdef FIXED_POINT |
| 206 | shift = spx_ilog2(e_tot); |
| 207 | e_tot = VSHR32(e_tot, shift-25); |
| 208 | e_left = VSHR32(e_left, shift-10); |
| 209 | e_right = VSHR32(e_right, shift-10); |
| 210 | e_ratio = DIV32(e_tot, e_left+e_right+1); |
| 211 | #else |
| 212 | e_ratio = e_tot/(1.+e_left+e_right); |
| 213 | #endif |
| 214 | |
| 215 | tmp=scal_quant(EXTRACT16(e_ratio), e_ratio_quant_bounds, 4); |
| 216 | /*fprintf (stderr, "%d %d %d %d\n", largest, smallest, balance_id, e_ratio);*/ |
| 217 | speex_bits_pack(bits, tmp, 2); |
| 218 | } |
| 219 | |
| 220 | #ifndef DISABLE_FLOAT_API |
| 221 | EXPORT void speex_decode_stereo(float *data, int frame_size, SpeexStereoState *_stereo) |
| 222 | { |
| 223 | int i; |
| 224 | spx_word32_t balance; |
| 225 | spx_word16_t e_left, e_right, e_ratio; |
| 226 | RealSpeexStereoState *stereo = (RealSpeexStereoState*)_stereo; |
| 227 | |
| 228 | COMPATIBILITY_HACK(stereo); |
| 229 | |
| 230 | balance=stereo->balance; |
| 231 | e_ratio=stereo->e_ratio; |
| 232 | |
| 233 | /* These two are Q14, with max value just below 2. */ |
| 234 | e_right = DIV32(QCONST32(1., 22), spx_sqrt(MULT16_32_Q15(e_ratio, ADD32(QCONST32(1., 16), balance)))); |
| 235 | e_left = SHR32(MULT16_16(spx_sqrt(balance), e_right), 8); |
| 236 | |
| 237 | for (i=frame_size-1;i>=0;i--) |
| 238 | { |
| 239 | spx_word16_t tmp=data[i]; |
| 240 | stereo->smooth_left = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_left, QCONST16(0.98, 15)), e_left, QCONST16(0.02, 15)), 15)); |
| 241 | stereo->smooth_right = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_right, QCONST16(0.98, 15)), e_right, QCONST16(0.02, 15)), 15)); |
| 242 | data[2*i] = (float)MULT16_16_P14(stereo->smooth_left, tmp); |
| 243 | data[2*i+1] = (float)MULT16_16_P14(stereo->smooth_right, tmp); |
| 244 | } |
| 245 | } |
| 246 | #endif /* #ifndef DISABLE_FLOAT_API */ |
| 247 | |
| 248 | EXPORT void speex_decode_stereo_int(spx_int16_t *data, int frame_size, SpeexStereoState *_stereo) |
| 249 | { |
| 250 | int i; |
| 251 | spx_word32_t balance; |
| 252 | spx_word16_t e_left, e_right, e_ratio; |
| 253 | RealSpeexStereoState *stereo = (RealSpeexStereoState*)_stereo; |
| 254 | |
| 255 | COMPATIBILITY_HACK(stereo); |
| 256 | |
| 257 | balance=stereo->balance; |
| 258 | e_ratio=stereo->e_ratio; |
| 259 | |
| 260 | /* These two are Q14, with max value just below 2. */ |
| 261 | e_right = DIV32(QCONST32(1., 22), spx_sqrt(MULT16_32_Q15(e_ratio, ADD32(QCONST32(1., 16), balance)))); |
| 262 | e_left = SHR32(MULT16_16(spx_sqrt(balance), e_right), 8); |
| 263 | |
| 264 | for (i=frame_size-1;i>=0;i--) |
| 265 | { |
| 266 | spx_int16_t tmp=data[i]; |
| 267 | stereo->smooth_left = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_left, QCONST16(0.98, 15)), e_left, QCONST16(0.02, 15)), 15)); |
| 268 | stereo->smooth_right = EXTRACT16(PSHR32(MAC16_16(MULT16_16(stereo->smooth_right, QCONST16(0.98, 15)), e_right, QCONST16(0.02, 15)), 15)); |
| 269 | data[2*i] = (spx_int16_t)MULT16_16_P14(stereo->smooth_left, tmp); |
| 270 | data[2*i+1] = (spx_int16_t)MULT16_16_P14(stereo->smooth_right, tmp); |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | EXPORT int speex_std_stereo_request_handler(SpeexBits *bits, void *state, void *data) |
| 275 | { |
| 276 | RealSpeexStereoState *stereo; |
| 277 | spx_word16_t sign=1, dexp; |
| 278 | int tmp; |
| 279 | |
| 280 | stereo = (RealSpeexStereoState*)data; |
| 281 | |
| 282 | COMPATIBILITY_HACK(stereo); |
| 283 | |
| 284 | if (speex_bits_unpack_unsigned(bits, 1)) |
| 285 | sign=-1; |
| 286 | dexp = speex_bits_unpack_unsigned(bits, 5); |
| 287 | #ifndef FIXED_POINT |
| 288 | stereo->balance = exp(sign*.25*dexp); |
| 289 | #else |
| 290 | stereo->balance = spx_exp(MULT16_16(sign, SHL16(dexp, 9))); |
| 291 | #endif |
| 292 | tmp = speex_bits_unpack_unsigned(bits, 2); |
| 293 | stereo->e_ratio = e_ratio_quant[tmp]; |
| 294 | |
| 295 | return 0; |
| 296 | } |