Tristan Matthews | 0a329cc | 2013-07-17 13:20:14 -0400 | [diff] [blame] | 1 | /* Copyright (C) 2003-2006 Jean-Marc Valin |
| 2 | |
| 3 | File: mdf.c |
| 4 | Echo canceller based on the MDF algorithm (see below) |
| 5 | |
| 6 | Redistribution and use in source and binary forms, with or without |
| 7 | modification, are permitted provided that the following conditions are |
| 8 | met: |
| 9 | |
| 10 | 1. Redistributions of source code must retain the above copyright notice, |
| 11 | this list of conditions and the following disclaimer. |
| 12 | |
| 13 | 2. Redistributions in binary form must reproduce the above copyright |
| 14 | notice, this list of conditions and the following disclaimer in the |
| 15 | documentation and/or other materials provided with the distribution. |
| 16 | |
| 17 | 3. The name of the author may not be used to endorse or promote products |
| 18 | derived from this software without specific prior written permission. |
| 19 | |
| 20 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| 21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| 22 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, |
| 24 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
| 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 30 | POSSIBILITY OF SUCH DAMAGE. |
| 31 | */ |
| 32 | |
| 33 | /* |
| 34 | The echo canceller is based on the MDF algorithm described in: |
| 35 | |
| 36 | J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, |
| 37 | IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, |
| 38 | February 1990. |
| 39 | |
| 40 | We use the Alternatively Updated MDF (AUMDF) variant. Robustness to |
| 41 | double-talk is achieved using a variable learning rate as described in: |
| 42 | |
| 43 | Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo |
| 44 | Cancellation With Double-Talk. IEEE Transactions on Audio, |
| 45 | Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. |
| 46 | http://people.xiph.org/~jm/papers/valin_taslp2006.pdf |
| 47 | |
| 48 | There is no explicit double-talk detection, but a continuous variation |
| 49 | in the learning rate based on residual echo, double-talk and background |
| 50 | noise. |
| 51 | |
| 52 | About the fixed-point version: |
| 53 | All the signals are represented with 16-bit words. The filter weights |
| 54 | are represented with 32-bit words, but only the top 16 bits are used |
| 55 | in most cases. The lower 16 bits are completely unreliable (due to the |
| 56 | fact that the update is done only on the top bits), but help in the |
| 57 | adaptation -- probably by removing a "threshold effect" due to |
| 58 | quantization (rounding going to zero) when the gradient is small. |
| 59 | |
| 60 | Another kludge that seems to work good: when performing the weight |
| 61 | update, we only move half the way toward the "goal" this seems to |
| 62 | reduce the effect of quantization noise in the update phase. This |
| 63 | can be seen as applying a gradient descent on a "soft constraint" |
| 64 | instead of having a hard constraint. |
| 65 | |
| 66 | */ |
| 67 | |
| 68 | #ifdef HAVE_CONFIG_H |
| 69 | #include "config.h" |
| 70 | #endif |
| 71 | |
| 72 | #include "arch.h" |
| 73 | #include "speex/speex_echo.h" |
| 74 | #include "fftwrap.h" |
| 75 | #include "pseudofloat.h" |
| 76 | #include "math_approx.h" |
| 77 | #include "os_support.h" |
| 78 | |
| 79 | #ifndef M_PI |
| 80 | #define M_PI 3.14159265358979323846 |
| 81 | #endif |
| 82 | |
| 83 | #ifdef FIXED_POINT |
| 84 | #define WEIGHT_SHIFT 11 |
| 85 | #define NORMALIZE_SCALEDOWN 5 |
| 86 | #define NORMALIZE_SCALEUP 3 |
| 87 | #else |
| 88 | #define WEIGHT_SHIFT 0 |
| 89 | #endif |
| 90 | |
| 91 | /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk |
| 92 | and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ |
| 93 | #define TWO_PATH |
| 94 | |
| 95 | #ifdef FIXED_POINT |
| 96 | static const spx_float_t MIN_LEAK = {20972, -22}; |
| 97 | |
| 98 | /* Constants for the two-path filter */ |
| 99 | static const spx_float_t VAR1_SMOOTH = {23593, -16}; |
| 100 | static const spx_float_t VAR2_SMOOTH = {23675, -15}; |
| 101 | static const spx_float_t VAR1_UPDATE = {16384, -15}; |
| 102 | static const spx_float_t VAR2_UPDATE = {16384, -16}; |
| 103 | static const spx_float_t VAR_BACKTRACK = {16384, -12}; |
| 104 | #define TOP16(x) ((x)>>16) |
| 105 | |
| 106 | #else |
| 107 | |
| 108 | static const spx_float_t MIN_LEAK = .005f; |
| 109 | |
| 110 | /* Constants for the two-path filter */ |
| 111 | static const spx_float_t VAR1_SMOOTH = .36f; |
| 112 | static const spx_float_t VAR2_SMOOTH = .7225f; |
| 113 | static const spx_float_t VAR1_UPDATE = .5f; |
| 114 | static const spx_float_t VAR2_UPDATE = .25f; |
| 115 | static const spx_float_t VAR_BACKTRACK = 4.f; |
| 116 | #define TOP16(x) (x) |
| 117 | #endif |
| 118 | |
| 119 | |
| 120 | #define PLAYBACK_DELAY 2 |
| 121 | |
| 122 | void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); |
| 123 | |
| 124 | |
| 125 | /** Speex echo cancellation state. */ |
| 126 | struct SpeexEchoState_ { |
| 127 | int frame_size; /**< Number of samples processed each time */ |
| 128 | int window_size; |
| 129 | int M; |
| 130 | int cancel_count; |
| 131 | int adapted; |
| 132 | int saturated; |
| 133 | int screwed_up; |
| 134 | spx_int32_t sampling_rate; |
| 135 | spx_word16_t spec_average; |
| 136 | spx_word16_t beta0; |
| 137 | spx_word16_t beta_max; |
| 138 | spx_word32_t sum_adapt; |
| 139 | spx_word16_t leak_estimate; |
| 140 | |
| 141 | spx_word16_t *e; /* scratch */ |
| 142 | spx_word16_t *x; /* Far-end input buffer (2N) */ |
| 143 | spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */ |
| 144 | spx_word16_t *input; /* scratch */ |
| 145 | spx_word16_t *y; /* scratch */ |
| 146 | spx_word16_t *last_y; |
| 147 | spx_word16_t *Y; /* scratch */ |
| 148 | spx_word16_t *E; |
| 149 | spx_word32_t *PHI; /* scratch */ |
| 150 | spx_word32_t *W; /* (Background) filter weights */ |
| 151 | #ifdef TWO_PATH |
| 152 | spx_word16_t *foreground; /* Foreground filter weights */ |
| 153 | spx_word32_t Davg1; /* 1st recursive average of the residual power difference */ |
| 154 | spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */ |
| 155 | spx_float_t Dvar1; /* Estimated variance of 1st estimator */ |
| 156 | spx_float_t Dvar2; /* Estimated variance of 2nd estimator */ |
| 157 | #endif |
| 158 | spx_word32_t *power; /* Power of the far-end signal */ |
| 159 | spx_float_t *power_1;/* Inverse power of far-end */ |
| 160 | spx_word16_t *wtmp; /* scratch */ |
| 161 | #ifdef FIXED_POINT |
| 162 | spx_word16_t *wtmp2; /* scratch */ |
| 163 | #endif |
| 164 | spx_word32_t *Rf; /* scratch */ |
| 165 | spx_word32_t *Yf; /* scratch */ |
| 166 | spx_word32_t *Xf; /* scratch */ |
| 167 | spx_word32_t *Eh; |
| 168 | spx_word32_t *Yh; |
| 169 | spx_float_t Pey; |
| 170 | spx_float_t Pyy; |
| 171 | spx_word16_t *window; |
| 172 | spx_word16_t *prop; |
| 173 | void *fft_table; |
| 174 | spx_word16_t memX, memD, memE; |
| 175 | spx_word16_t preemph; |
| 176 | spx_word16_t notch_radius; |
| 177 | spx_mem_t notch_mem[2]; |
| 178 | |
| 179 | /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ |
| 180 | spx_int16_t *play_buf; |
| 181 | int play_buf_pos; |
| 182 | int play_buf_started; |
| 183 | }; |
| 184 | |
| 185 | static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) |
| 186 | { |
| 187 | int i; |
| 188 | spx_word16_t den2; |
| 189 | #ifdef FIXED_POINT |
| 190 | den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius)); |
| 191 | #else |
| 192 | den2 = radius*radius + .7*(1-radius)*(1-radius); |
| 193 | #endif |
| 194 | /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ |
| 195 | for (i=0;i<len;i++) |
| 196 | { |
| 197 | spx_word16_t vin = in[i]; |
| 198 | spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); |
| 199 | #ifdef FIXED_POINT |
| 200 | mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); |
| 201 | #else |
| 202 | mem[0] = mem[1] + 2*(-vin + radius*vout); |
| 203 | #endif |
| 204 | mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout); |
| 205 | out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | /* This inner product is slightly different from the codec version because of fixed-point */ |
| 210 | static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) |
| 211 | { |
| 212 | spx_word32_t sum=0; |
| 213 | len >>= 1; |
| 214 | while(len--) |
| 215 | { |
| 216 | spx_word32_t part=0; |
| 217 | part = MAC16_16(part,*x++,*y++); |
| 218 | part = MAC16_16(part,*x++,*y++); |
| 219 | /* HINT: If you had a 40-bit accumulator, you could shift only at the end */ |
| 220 | sum = ADD32(sum,SHR32(part,6)); |
| 221 | } |
| 222 | return sum; |
| 223 | } |
| 224 | |
| 225 | /** Compute power spectrum of a half-complex (packed) vector */ |
| 226 | static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) |
| 227 | { |
| 228 | int i, j; |
| 229 | ps[0]=MULT16_16(X[0],X[0]); |
| 230 | for (i=1,j=1;i<N-1;i+=2,j++) |
| 231 | { |
| 232 | ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); |
| 233 | } |
| 234 | ps[j]=MULT16_16(X[i],X[i]); |
| 235 | } |
| 236 | |
| 237 | /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ |
| 238 | #ifdef FIXED_POINT |
| 239 | static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) |
| 240 | { |
| 241 | int i,j; |
| 242 | spx_word32_t tmp1=0,tmp2=0; |
| 243 | for (j=0;j<M;j++) |
| 244 | { |
| 245 | tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N])); |
| 246 | } |
| 247 | acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); |
| 248 | for (i=1;i<N-1;i+=2) |
| 249 | { |
| 250 | tmp1 = tmp2 = 0; |
| 251 | for (j=0;j<M;j++) |
| 252 | { |
| 253 | tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1]))); |
| 254 | tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1])); |
| 255 | } |
| 256 | acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); |
| 257 | acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); |
| 258 | } |
| 259 | tmp1 = tmp2 = 0; |
| 260 | for (j=0;j<M;j++) |
| 261 | { |
| 262 | tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1])); |
| 263 | } |
| 264 | acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); |
| 265 | } |
| 266 | static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M) |
| 267 | { |
| 268 | int i,j; |
| 269 | spx_word32_t tmp1=0,tmp2=0; |
| 270 | for (j=0;j<M;j++) |
| 271 | { |
| 272 | tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]); |
| 273 | } |
| 274 | acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); |
| 275 | for (i=1;i<N-1;i+=2) |
| 276 | { |
| 277 | tmp1 = tmp2 = 0; |
| 278 | for (j=0;j<M;j++) |
| 279 | { |
| 280 | tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1])); |
| 281 | tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]); |
| 282 | } |
| 283 | acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); |
| 284 | acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); |
| 285 | } |
| 286 | tmp1 = tmp2 = 0; |
| 287 | for (j=0;j<M;j++) |
| 288 | { |
| 289 | tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]); |
| 290 | } |
| 291 | acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); |
| 292 | } |
| 293 | |
| 294 | #else |
| 295 | static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) |
| 296 | { |
| 297 | int i,j; |
| 298 | for (i=0;i<N;i++) |
| 299 | acc[i] = 0; |
| 300 | for (j=0;j<M;j++) |
| 301 | { |
| 302 | acc[0] += X[0]*Y[0]; |
| 303 | for (i=1;i<N-1;i+=2) |
| 304 | { |
| 305 | acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]); |
| 306 | acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]); |
| 307 | } |
| 308 | acc[i] += X[i]*Y[i]; |
| 309 | X += N; |
| 310 | Y += N; |
| 311 | } |
| 312 | } |
| 313 | #define spectral_mul_accum16 spectral_mul_accum |
| 314 | #endif |
| 315 | |
| 316 | /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ |
| 317 | static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) |
| 318 | { |
| 319 | int i, j; |
| 320 | spx_float_t W; |
| 321 | W = FLOAT_AMULT(p, w[0]); |
| 322 | prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); |
| 323 | for (i=1,j=1;i<N-1;i+=2,j++) |
| 324 | { |
| 325 | W = FLOAT_AMULT(p, w[j]); |
| 326 | prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); |
| 327 | prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); |
| 328 | } |
| 329 | W = FLOAT_AMULT(p, w[j]); |
| 330 | prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); |
| 331 | } |
| 332 | |
| 333 | static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop) |
| 334 | { |
| 335 | int i, j; |
| 336 | spx_word16_t max_sum = 1; |
| 337 | spx_word32_t prop_sum = 1; |
| 338 | for (i=0;i<M;i++) |
| 339 | { |
| 340 | spx_word32_t tmp = 1; |
| 341 | for (j=0;j<N;j++) |
| 342 | tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18))); |
| 343 | #ifdef FIXED_POINT |
| 344 | /* Just a security in case an overflow were to occur */ |
| 345 | tmp = MIN32(ABS32(tmp), 536870912); |
| 346 | #endif |
| 347 | prop[i] = spx_sqrt(tmp); |
| 348 | if (prop[i] > max_sum) |
| 349 | max_sum = prop[i]; |
| 350 | } |
| 351 | for (i=0;i<M;i++) |
| 352 | { |
| 353 | prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum); |
| 354 | prop_sum += EXTEND32(prop[i]); |
| 355 | } |
| 356 | for (i=0;i<M;i++) |
| 357 | { |
| 358 | prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum); |
| 359 | /*printf ("%f ", prop[i]);*/ |
| 360 | } |
| 361 | /*printf ("\n");*/ |
| 362 | } |
| 363 | |
| 364 | #ifdef DUMP_ECHO_CANCEL_DATA |
| 365 | #include <stdio.h> |
| 366 | static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL; |
| 367 | |
| 368 | static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len) |
| 369 | { |
| 370 | if (!(rFile && pFile && oFile)) |
| 371 | { |
| 372 | speex_fatal("Dump files not open"); |
| 373 | } |
| 374 | fwrite(rec, sizeof(spx_int16_t), len, rFile); |
| 375 | fwrite(play, sizeof(spx_int16_t), len, pFile); |
| 376 | fwrite(out, sizeof(spx_int16_t), len, oFile); |
| 377 | } |
| 378 | #endif |
| 379 | |
| 380 | /** Creates a new echo canceller state */ |
| 381 | SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) |
| 382 | { |
| 383 | int i,N,M; |
| 384 | SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); |
| 385 | |
| 386 | #ifdef DUMP_ECHO_CANCEL_DATA |
| 387 | if (rFile || pFile || oFile) |
| 388 | speex_fatal("Opening dump files twice"); |
| 389 | rFile = fopen("aec_rec.sw", "wb"); |
| 390 | pFile = fopen("aec_play.sw", "wb"); |
| 391 | oFile = fopen("aec_out.sw", "wb"); |
| 392 | #endif |
| 393 | |
| 394 | st->frame_size = frame_size; |
| 395 | st->window_size = 2*frame_size; |
| 396 | N = st->window_size; |
| 397 | M = st->M = (filter_length+st->frame_size-1)/frame_size; |
| 398 | st->cancel_count=0; |
| 399 | st->sum_adapt = 0; |
| 400 | st->saturated = 0; |
| 401 | st->screwed_up = 0; |
| 402 | /* This is the default sampling rate */ |
| 403 | st->sampling_rate = 8000; |
| 404 | st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); |
| 405 | #ifdef FIXED_POINT |
| 406 | st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); |
| 407 | st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); |
| 408 | #else |
| 409 | st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; |
| 410 | st->beta_max = (.5f*st->frame_size)/st->sampling_rate; |
| 411 | #endif |
| 412 | st->leak_estimate = 0; |
| 413 | |
| 414 | st->fft_table = spx_fft_init(N); |
| 415 | |
| 416 | st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 417 | st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 418 | st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); |
| 419 | st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 420 | st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 421 | st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); |
| 422 | st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); |
| 423 | st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); |
| 424 | st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); |
| 425 | st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); |
| 426 | |
| 427 | st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t)); |
| 428 | st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 429 | st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 430 | st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); |
| 431 | #ifdef TWO_PATH |
| 432 | st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t)); |
| 433 | #endif |
| 434 | st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); |
| 435 | st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); |
| 436 | st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); |
| 437 | st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 438 | st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); |
| 439 | st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 440 | #ifdef FIXED_POINT |
| 441 | st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); |
| 442 | for (i=0;i<N>>1;i++) |
| 443 | { |
| 444 | st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1)); |
| 445 | st->window[N-i-1] = st->window[i]; |
| 446 | } |
| 447 | #else |
| 448 | for (i=0;i<N;i++) |
| 449 | st->window[i] = .5-.5*cos(2*M_PI*i/N); |
| 450 | #endif |
| 451 | for (i=0;i<=st->frame_size;i++) |
| 452 | st->power_1[i] = FLOAT_ONE; |
| 453 | for (i=0;i<N*M;i++) |
| 454 | st->W[i] = 0; |
| 455 | { |
| 456 | spx_word32_t sum = 0; |
| 457 | /* Ratio of ~10 between adaptation rate of first and last block */ |
| 458 | spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); |
| 459 | st->prop[0] = QCONST16(.7, 15); |
| 460 | sum = EXTEND32(st->prop[0]); |
| 461 | for (i=1;i<M;i++) |
| 462 | { |
| 463 | st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); |
| 464 | sum = ADD32(sum, EXTEND32(st->prop[i])); |
| 465 | } |
| 466 | for (i=M-1;i>=0;i--) |
| 467 | { |
| 468 | st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); |
| 469 | } |
| 470 | } |
| 471 | |
| 472 | st->memX=st->memD=st->memE=0; |
| 473 | st->preemph = QCONST16(.9,15); |
| 474 | if (st->sampling_rate<12000) |
| 475 | st->notch_radius = QCONST16(.9, 15); |
| 476 | else if (st->sampling_rate<24000) |
| 477 | st->notch_radius = QCONST16(.982, 15); |
| 478 | else |
| 479 | st->notch_radius = QCONST16(.992, 15); |
| 480 | |
| 481 | st->notch_mem[0] = st->notch_mem[1] = 0; |
| 482 | st->adapted = 0; |
| 483 | st->Pey = st->Pyy = FLOAT_ONE; |
| 484 | |
| 485 | #ifdef TWO_PATH |
| 486 | st->Davg1 = st->Davg2 = 0; |
| 487 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; |
| 488 | #endif |
| 489 | |
| 490 | st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); |
| 491 | st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; |
| 492 | st->play_buf_started = 0; |
| 493 | |
| 494 | return st; |
| 495 | } |
| 496 | |
| 497 | /** Resets echo canceller state */ |
| 498 | void speex_echo_state_reset(SpeexEchoState *st) |
| 499 | { |
| 500 | int i, M, N; |
| 501 | st->cancel_count=0; |
| 502 | st->screwed_up = 0; |
| 503 | N = st->window_size; |
| 504 | M = st->M; |
| 505 | for (i=0;i<N*M;i++) |
| 506 | st->W[i] = 0; |
| 507 | #ifdef TWO_PATH |
| 508 | for (i=0;i<N*M;i++) |
| 509 | st->foreground[i] = 0; |
| 510 | #endif |
| 511 | for (i=0;i<N*(M+1);i++) |
| 512 | st->X[i] = 0; |
| 513 | for (i=0;i<=st->frame_size;i++) |
| 514 | { |
| 515 | st->power[i] = 0; |
| 516 | st->power_1[i] = FLOAT_ONE; |
| 517 | st->Eh[i] = 0; |
| 518 | st->Yh[i] = 0; |
| 519 | } |
| 520 | for (i=0;i<st->frame_size;i++) |
| 521 | { |
| 522 | st->last_y[i] = 0; |
| 523 | } |
| 524 | for (i=0;i<N;i++) |
| 525 | { |
| 526 | st->E[i] = 0; |
| 527 | st->x[i] = 0; |
| 528 | } |
| 529 | st->notch_mem[0] = st->notch_mem[1] = 0; |
| 530 | st->memX=st->memD=st->memE=0; |
| 531 | |
| 532 | st->saturated = 0; |
| 533 | st->adapted = 0; |
| 534 | st->sum_adapt = 0; |
| 535 | st->Pey = st->Pyy = FLOAT_ONE; |
| 536 | #ifdef TWO_PATH |
| 537 | st->Davg1 = st->Davg2 = 0; |
| 538 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; |
| 539 | #endif |
| 540 | for (i=0;i<3*st->frame_size;i++) |
| 541 | st->play_buf[i] = 0; |
| 542 | st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; |
| 543 | st->play_buf_started = 0; |
| 544 | |
| 545 | } |
| 546 | |
| 547 | /** Destroys an echo canceller state */ |
| 548 | void speex_echo_state_destroy(SpeexEchoState *st) |
| 549 | { |
| 550 | spx_fft_destroy(st->fft_table); |
| 551 | |
| 552 | speex_free(st->e); |
| 553 | speex_free(st->x); |
| 554 | speex_free(st->input); |
| 555 | speex_free(st->y); |
| 556 | speex_free(st->last_y); |
| 557 | speex_free(st->Yf); |
| 558 | speex_free(st->Rf); |
| 559 | speex_free(st->Xf); |
| 560 | speex_free(st->Yh); |
| 561 | speex_free(st->Eh); |
| 562 | |
| 563 | speex_free(st->X); |
| 564 | speex_free(st->Y); |
| 565 | speex_free(st->E); |
| 566 | speex_free(st->W); |
| 567 | #ifdef TWO_PATH |
| 568 | speex_free(st->foreground); |
| 569 | #endif |
| 570 | speex_free(st->PHI); |
| 571 | speex_free(st->power); |
| 572 | speex_free(st->power_1); |
| 573 | speex_free(st->window); |
| 574 | speex_free(st->prop); |
| 575 | speex_free(st->wtmp); |
| 576 | #ifdef FIXED_POINT |
| 577 | speex_free(st->wtmp2); |
| 578 | #endif |
| 579 | speex_free(st->play_buf); |
| 580 | speex_free(st); |
| 581 | |
| 582 | #ifdef DUMP_ECHO_CANCEL_DATA |
| 583 | fclose(rFile); |
| 584 | fclose(pFile); |
| 585 | fclose(oFile); |
| 586 | rFile = pFile = oFile = NULL; |
| 587 | #endif |
| 588 | } |
| 589 | |
| 590 | void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) |
| 591 | { |
| 592 | int i; |
| 593 | /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ |
| 594 | st->play_buf_started = 1; |
| 595 | if (st->play_buf_pos>=st->frame_size) |
| 596 | { |
| 597 | speex_echo_cancellation(st, rec, st->play_buf, out); |
| 598 | st->play_buf_pos -= st->frame_size; |
| 599 | for (i=0;i<st->play_buf_pos;i++) |
| 600 | st->play_buf[i] = st->play_buf[i+st->frame_size]; |
| 601 | } else { |
| 602 | speex_warning("No playback frame available (your application is buggy and/or got xruns)"); |
| 603 | if (st->play_buf_pos!=0) |
| 604 | { |
| 605 | speex_warning("internal playback buffer corruption?"); |
| 606 | st->play_buf_pos = 0; |
| 607 | } |
| 608 | for (i=0;i<st->frame_size;i++) |
| 609 | out[i] = rec[i]; |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) |
| 614 | { |
| 615 | /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ |
| 616 | if (!st->play_buf_started) |
| 617 | { |
| 618 | speex_warning("discarded first playback frame"); |
| 619 | return; |
| 620 | } |
| 621 | if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) |
| 622 | { |
| 623 | int i; |
| 624 | for (i=0;i<st->frame_size;i++) |
| 625 | st->play_buf[st->play_buf_pos+i] = play[i]; |
| 626 | st->play_buf_pos += st->frame_size; |
| 627 | if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) |
| 628 | { |
| 629 | speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); |
| 630 | for (i=0;i<st->frame_size;i++) |
| 631 | st->play_buf[st->play_buf_pos+i] = play[i]; |
| 632 | st->play_buf_pos += st->frame_size; |
| 633 | } |
| 634 | } else { |
| 635 | speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); |
| 636 | } |
| 637 | } |
| 638 | |
| 639 | /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ |
| 640 | void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) |
| 641 | { |
| 642 | speex_echo_cancellation(st, in, far_end, out); |
| 643 | } |
| 644 | |
| 645 | /** Performs echo cancellation on a frame */ |
| 646 | void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) |
| 647 | { |
| 648 | int i,j; |
| 649 | int N,M; |
| 650 | spx_word32_t Syy,See,Sxx,Sdd, Sff; |
| 651 | #ifdef TWO_PATH |
| 652 | spx_word32_t Dbf; |
| 653 | int update_foreground; |
| 654 | #endif |
| 655 | spx_word32_t Sey; |
| 656 | spx_word16_t ss, ss_1; |
| 657 | spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; |
| 658 | spx_float_t alpha, alpha_1; |
| 659 | spx_word16_t RER; |
| 660 | spx_word32_t tmp32; |
| 661 | |
| 662 | N = st->window_size; |
| 663 | M = st->M; |
| 664 | st->cancel_count++; |
| 665 | #ifdef FIXED_POINT |
| 666 | ss=DIV32_16(11469,M); |
| 667 | ss_1 = SUB16(32767,ss); |
| 668 | #else |
| 669 | ss=.35/M; |
| 670 | ss_1 = 1-ss; |
| 671 | #endif |
| 672 | |
| 673 | /* Apply a notch filter to make sure DC doesn't end up causing problems */ |
| 674 | filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); |
| 675 | /* Copy input data to buffer and apply pre-emphasis */ |
| 676 | for (i=0;i<st->frame_size;i++) |
| 677 | { |
| 678 | spx_word32_t tmp32; |
| 679 | tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); |
| 680 | #ifdef FIXED_POINT |
| 681 | /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ |
| 682 | if (tmp32 > 32767) |
| 683 | { |
| 684 | tmp32 = 32767; |
| 685 | st->saturated = M+1; |
| 686 | } |
| 687 | if (tmp32 < -32767) |
| 688 | { |
| 689 | tmp32 = -32767; |
| 690 | st->saturated = M+1; |
| 691 | } |
| 692 | #endif |
| 693 | st->x[i+st->frame_size] = EXTRACT16(tmp32); |
| 694 | st->memX = far_end[i]; |
| 695 | |
| 696 | tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); |
| 697 | #ifdef FIXED_POINT |
| 698 | if (tmp32 > 32767) |
| 699 | { |
| 700 | tmp32 = 32767; |
| 701 | if (st->saturated == 0) |
| 702 | st->saturated = 1; |
| 703 | } |
| 704 | if (tmp32 < -32767) |
| 705 | { |
| 706 | tmp32 = -32767; |
| 707 | if (st->saturated == 0) |
| 708 | st->saturated = 1; |
| 709 | } |
| 710 | #endif |
| 711 | st->memD = st->input[i]; |
| 712 | st->input[i] = tmp32; |
| 713 | } |
| 714 | |
| 715 | /* Shift memory: this could be optimized eventually*/ |
| 716 | for (j=M-1;j>=0;j--) |
| 717 | { |
| 718 | for (i=0;i<N;i++) |
| 719 | st->X[(j+1)*N+i] = st->X[j*N+i]; |
| 720 | } |
| 721 | |
| 722 | /* Convert x (far end) to frequency domain */ |
| 723 | spx_fft(st->fft_table, st->x, &st->X[0]); |
| 724 | for (i=0;i<N;i++) |
| 725 | st->last_y[i] = st->x[i]; |
| 726 | Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); |
| 727 | for (i=0;i<st->frame_size;i++) |
| 728 | st->x[i] = st->x[i+st->frame_size]; |
| 729 | /* From here on, the top part of x is used as scratch space */ |
| 730 | |
| 731 | #ifdef TWO_PATH |
| 732 | /* Compute foreground filter */ |
| 733 | spectral_mul_accum16(st->X, st->foreground, st->Y, N, M); |
| 734 | spx_ifft(st->fft_table, st->Y, st->e); |
| 735 | for (i=0;i<st->frame_size;i++) |
| 736 | st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]); |
| 737 | Sff = mdf_inner_prod(st->e, st->e, st->frame_size); |
| 738 | #endif |
| 739 | |
| 740 | /* Adjust proportional adaption rate */ |
| 741 | mdf_adjust_prop (st->W, N, M, st->prop); |
| 742 | /* Compute weight gradient */ |
| 743 | if (st->saturated == 0) |
| 744 | { |
| 745 | for (j=M-1;j>=0;j--) |
| 746 | { |
| 747 | weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); |
| 748 | for (i=0;i<N;i++) |
| 749 | st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); |
| 750 | |
| 751 | } |
| 752 | } else { |
| 753 | st->saturated--; |
| 754 | } |
| 755 | |
| 756 | /* Update weight to prevent circular convolution (MDF / AUMDF) */ |
| 757 | for (j=0;j<M;j++) |
| 758 | { |
| 759 | /* This is a variant of the Alternatively Updated MDF (AUMDF) */ |
| 760 | /* Remove the "if" to make this an MDF filter */ |
| 761 | if (j==0 || st->cancel_count%(M-1) == j-1) |
| 762 | { |
| 763 | #ifdef FIXED_POINT |
| 764 | for (i=0;i<N;i++) |
| 765 | st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16)); |
| 766 | spx_ifft(st->fft_table, st->wtmp2, st->wtmp); |
| 767 | for (i=0;i<st->frame_size;i++) |
| 768 | { |
| 769 | st->wtmp[i]=0; |
| 770 | } |
| 771 | for (i=st->frame_size;i<N;i++) |
| 772 | { |
| 773 | st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); |
| 774 | } |
| 775 | spx_fft(st->fft_table, st->wtmp, st->wtmp2); |
| 776 | /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ |
| 777 | for (i=0;i<N;i++) |
| 778 | st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); |
| 779 | #else |
| 780 | spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); |
| 781 | for (i=st->frame_size;i<N;i++) |
| 782 | { |
| 783 | st->wtmp[i]=0; |
| 784 | } |
| 785 | spx_fft(st->fft_table, st->wtmp, &st->W[j*N]); |
| 786 | #endif |
| 787 | } |
| 788 | } |
| 789 | |
| 790 | /* Compute filter response Y */ |
| 791 | spectral_mul_accum(st->X, st->W, st->Y, N, M); |
| 792 | spx_ifft(st->fft_table, st->Y, st->y); |
| 793 | |
| 794 | #ifdef TWO_PATH |
| 795 | /* Difference in response, this is used to estimate the variance of our residual power estimate */ |
| 796 | for (i=0;i<st->frame_size;i++) |
| 797 | st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); |
| 798 | Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size); |
| 799 | #endif |
| 800 | |
| 801 | for (i=0;i<st->frame_size;i++) |
| 802 | st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); |
| 803 | See = mdf_inner_prod(st->e, st->e, st->frame_size); |
| 804 | #ifndef TWO_PATH |
| 805 | Sff = See; |
| 806 | #endif |
| 807 | |
| 808 | #ifdef TWO_PATH |
| 809 | /* Logic for updating the foreground filter */ |
| 810 | |
| 811 | /* For two time windows, compute the mean of the energy difference, as well as the variance */ |
| 812 | st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); |
| 813 | st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); |
| 814 | st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); |
| 815 | st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); |
| 816 | |
| 817 | /* Equivalent float code: |
| 818 | st->Davg1 = .6*st->Davg1 + .4*(Sff-See); |
| 819 | st->Davg2 = .85*st->Davg2 + .15*(Sff-See); |
| 820 | st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; |
| 821 | st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; |
| 822 | */ |
| 823 | |
| 824 | update_foreground = 0; |
| 825 | /* Check if we have a statistically significant reduction in the residual echo */ |
| 826 | /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ |
| 827 | if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) |
| 828 | update_foreground = 1; |
| 829 | else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) |
| 830 | update_foreground = 1; |
| 831 | else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) |
| 832 | update_foreground = 1; |
| 833 | |
| 834 | /* Do we update? */ |
| 835 | if (update_foreground) |
| 836 | { |
| 837 | st->Davg1 = st->Davg2 = 0; |
| 838 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; |
| 839 | /* Copy background filter to foreground filter */ |
| 840 | for (i=0;i<N*M;i++) |
| 841 | st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); |
| 842 | /* Apply a smooth transition so as to not introduce blocking artifacts */ |
| 843 | for (i=0;i<st->frame_size;i++) |
| 844 | st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); |
| 845 | } else { |
| 846 | int reset_background=0; |
| 847 | /* Otherwise, check if the background filter is significantly worse */ |
| 848 | if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) |
| 849 | reset_background = 1; |
| 850 | if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) |
| 851 | reset_background = 1; |
| 852 | if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) |
| 853 | reset_background = 1; |
| 854 | if (reset_background) |
| 855 | { |
| 856 | /* Copy foreground filter to background filter */ |
| 857 | for (i=0;i<N*M;i++) |
| 858 | st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); |
| 859 | /* We also need to copy the output so as to get correct adaptation */ |
| 860 | for (i=0;i<st->frame_size;i++) |
| 861 | st->y[i+st->frame_size] = st->e[i+st->frame_size]; |
| 862 | for (i=0;i<st->frame_size;i++) |
| 863 | st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]); |
| 864 | See = Sff; |
| 865 | st->Davg1 = st->Davg2 = 0; |
| 866 | st->Dvar1 = st->Dvar2 = FLOAT_ZERO; |
| 867 | } |
| 868 | } |
| 869 | #endif |
| 870 | |
| 871 | /* Compute error signal (for the output with de-emphasis) */ |
| 872 | for (i=0;i<st->frame_size;i++) |
| 873 | { |
| 874 | spx_word32_t tmp_out; |
| 875 | #ifdef TWO_PATH |
| 876 | tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); |
| 877 | #else |
| 878 | tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); |
| 879 | #endif |
| 880 | /* Saturation */ |
| 881 | if (tmp_out>32767) |
| 882 | tmp_out = 32767; |
| 883 | else if (tmp_out<-32768) |
| 884 | tmp_out = -32768; |
| 885 | tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); |
| 886 | /* This is an arbitrary test for saturation in the microphone signal */ |
| 887 | if (in[i] <= -32000 || in[i] >= 32000) |
| 888 | { |
| 889 | tmp_out = 0; |
| 890 | if (st->saturated == 0) |
| 891 | st->saturated = 1; |
| 892 | } |
| 893 | out[i] = (spx_int16_t)tmp_out; |
| 894 | st->memE = tmp_out; |
| 895 | } |
| 896 | |
| 897 | #ifdef DUMP_ECHO_CANCEL_DATA |
| 898 | dump_audio(in, far_end, out, st->frame_size); |
| 899 | #endif |
| 900 | |
| 901 | /* Compute error signal (filter update version) */ |
| 902 | for (i=0;i<st->frame_size;i++) |
| 903 | { |
| 904 | st->e[i+st->frame_size] = st->e[i]; |
| 905 | st->e[i] = 0; |
| 906 | } |
| 907 | |
| 908 | /* Compute a bunch of correlations */ |
| 909 | Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); |
| 910 | Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); |
| 911 | Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); |
| 912 | |
| 913 | /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ |
| 914 | |
| 915 | /* Do some sanity check */ |
| 916 | if (!(Syy>=0 && Sxx>=0 && See >= 0) |
| 917 | #ifndef FIXED_POINT |
| 918 | || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) |
| 919 | #endif |
| 920 | ) |
| 921 | { |
| 922 | /* Things have gone really bad */ |
| 923 | st->screwed_up += 50; |
| 924 | for (i=0;i<st->frame_size;i++) |
| 925 | out[i] = 0; |
| 926 | } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) |
| 927 | { |
| 928 | /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ |
| 929 | st->screwed_up++; |
| 930 | } else { |
| 931 | /* Everything's fine */ |
| 932 | st->screwed_up=0; |
| 933 | } |
| 934 | if (st->screwed_up>=50) |
| 935 | { |
| 936 | speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); |
| 937 | speex_echo_state_reset(st); |
| 938 | return; |
| 939 | } |
| 940 | |
| 941 | /* Add a small noise floor to make sure not to have problems when dividing */ |
| 942 | See = MAX32(See, SHR32(MULT16_16(N, 100),6)); |
| 943 | |
| 944 | /* Convert error to frequency domain */ |
| 945 | spx_fft(st->fft_table, st->e, st->E); |
| 946 | for (i=0;i<st->frame_size;i++) |
| 947 | st->y[i] = 0; |
| 948 | spx_fft(st->fft_table, st->y, st->Y); |
| 949 | |
| 950 | /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ |
| 951 | power_spectrum(st->E, st->Rf, N); |
| 952 | power_spectrum(st->Y, st->Yf, N); |
| 953 | power_spectrum(st->X, st->Xf, N); |
| 954 | |
| 955 | /* Smooth far end energy estimate over time */ |
| 956 | for (j=0;j<=st->frame_size;j++) |
| 957 | st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); |
| 958 | |
| 959 | /* Enable this to compute the power based only on the tail (would need to compute more |
| 960 | efficiently to make this really useful */ |
| 961 | if (0) |
| 962 | { |
| 963 | float scale2 = .5f/M; |
| 964 | for (j=0;j<=st->frame_size;j++) |
| 965 | st->power[j] = 100; |
| 966 | for (i=0;i<M;i++) |
| 967 | { |
| 968 | power_spectrum(&st->X[i*N], st->Xf, N); |
| 969 | for (j=0;j<=st->frame_size;j++) |
| 970 | st->power[j] += scale2*st->Xf[j]; |
| 971 | } |
| 972 | } |
| 973 | |
| 974 | /* Compute filtered spectra and (cross-)correlations */ |
| 975 | for (j=st->frame_size;j>=0;j--) |
| 976 | { |
| 977 | spx_float_t Eh, Yh; |
| 978 | Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]); |
| 979 | Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]); |
| 980 | Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); |
| 981 | Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); |
| 982 | #ifdef FIXED_POINT |
| 983 | st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]); |
| 984 | st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]); |
| 985 | #else |
| 986 | st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j]; |
| 987 | st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j]; |
| 988 | #endif |
| 989 | } |
| 990 | |
| 991 | Pyy = FLOAT_SQRT(Pyy); |
| 992 | Pey = FLOAT_DIVU(Pey,Pyy); |
| 993 | |
| 994 | /* Compute correlation updatete rate */ |
| 995 | tmp32 = MULT16_32_Q15(st->beta0,Syy); |
| 996 | if (tmp32 > MULT16_32_Q15(st->beta_max,See)) |
| 997 | tmp32 = MULT16_32_Q15(st->beta_max,See); |
| 998 | alpha = FLOAT_DIV32(tmp32, See); |
| 999 | alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha); |
| 1000 | /* Update correlations (recursive average) */ |
| 1001 | st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey)); |
| 1002 | st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy)); |
| 1003 | if (FLOAT_LT(st->Pyy, FLOAT_ONE)) |
| 1004 | st->Pyy = FLOAT_ONE; |
| 1005 | /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */ |
| 1006 | if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy))) |
| 1007 | st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy); |
| 1008 | if (FLOAT_GT(st->Pey, st->Pyy)) |
| 1009 | st->Pey = st->Pyy; |
| 1010 | /* leak_estimate is the linear regression result */ |
| 1011 | st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); |
| 1012 | /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ |
| 1013 | if (st->leak_estimate > 16383) |
| 1014 | st->leak_estimate = 32767; |
| 1015 | else |
| 1016 | st->leak_estimate = SHL16(st->leak_estimate,1); |
| 1017 | /*printf ("%f\n", st->leak_estimate);*/ |
| 1018 | |
| 1019 | /* Compute Residual to Error Ratio */ |
| 1020 | #ifdef FIXED_POINT |
| 1021 | tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); |
| 1022 | tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); |
| 1023 | /* Check for y in e (lower bound on RER) */ |
| 1024 | { |
| 1025 | spx_float_t bound = PSEUDOFLOAT(Sey); |
| 1026 | bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); |
| 1027 | if (FLOAT_GT(bound, PSEUDOFLOAT(See))) |
| 1028 | tmp32 = See; |
| 1029 | else if (tmp32 < FLOAT_EXTRACT32(bound)) |
| 1030 | tmp32 = FLOAT_EXTRACT32(bound); |
| 1031 | } |
| 1032 | if (tmp32 > SHR32(See,1)) |
| 1033 | tmp32 = SHR32(See,1); |
| 1034 | RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); |
| 1035 | #else |
| 1036 | RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; |
| 1037 | /* Check for y in e (lower bound on RER) */ |
| 1038 | if (RER < Sey*Sey/(1+See*Syy)) |
| 1039 | RER = Sey*Sey/(1+See*Syy); |
| 1040 | if (RER > .5) |
| 1041 | RER = .5; |
| 1042 | #endif |
| 1043 | |
| 1044 | /* We consider that the filter has had minimal adaptation if the following is true*/ |
| 1045 | if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) |
| 1046 | { |
| 1047 | st->adapted = 1; |
| 1048 | } |
| 1049 | |
| 1050 | if (st->adapted) |
| 1051 | { |
| 1052 | /* Normal learning rate calculation once we're past the minimal adaptation phase */ |
| 1053 | for (i=0;i<=st->frame_size;i++) |
| 1054 | { |
| 1055 | spx_word32_t r, e; |
| 1056 | /* Compute frequency-domain adaptation mask */ |
| 1057 | r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); |
| 1058 | e = SHL32(st->Rf[i],3)+1; |
| 1059 | #ifdef FIXED_POINT |
| 1060 | if (r>SHR32(e,1)) |
| 1061 | r = SHR32(e,1); |
| 1062 | #else |
| 1063 | if (r>.5*e) |
| 1064 | r = .5*e; |
| 1065 | #endif |
| 1066 | r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); |
| 1067 | /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ |
| 1068 | st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); |
| 1069 | } |
| 1070 | } else { |
| 1071 | /* Temporary adaption rate if filter is not yet adapted enough */ |
| 1072 | spx_word16_t adapt_rate=0; |
| 1073 | |
| 1074 | if (Sxx > SHR32(MULT16_16(N, 1000),6)) |
| 1075 | { |
| 1076 | tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); |
| 1077 | #ifdef FIXED_POINT |
| 1078 | if (tmp32 > SHR32(See,2)) |
| 1079 | tmp32 = SHR32(See,2); |
| 1080 | #else |
| 1081 | if (tmp32 > .25*See) |
| 1082 | tmp32 = .25*See; |
| 1083 | #endif |
| 1084 | adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); |
| 1085 | } |
| 1086 | for (i=0;i<=st->frame_size;i++) |
| 1087 | st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); |
| 1088 | |
| 1089 | |
| 1090 | /* How much have we adapted so far? */ |
| 1091 | st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); |
| 1092 | } |
| 1093 | |
| 1094 | /* Save residual echo so it can be used by the nonlinear processor */ |
| 1095 | if (st->adapted) |
| 1096 | { |
| 1097 | /* If the filter is adapted, take the filtered echo */ |
| 1098 | for (i=0;i<st->frame_size;i++) |
| 1099 | st->last_y[i] = st->last_y[st->frame_size+i]; |
| 1100 | for (i=0;i<st->frame_size;i++) |
| 1101 | st->last_y[st->frame_size+i] = in[i]-out[i]; |
| 1102 | } else { |
| 1103 | /* If filter isn't adapted yet, all we can do is take the far end signal directly */ |
| 1104 | /* moved earlier: for (i=0;i<N;i++) |
| 1105 | st->last_y[i] = st->x[i];*/ |
| 1106 | } |
| 1107 | |
| 1108 | } |
| 1109 | |
| 1110 | /* Compute spectrum of estimated echo for use in an echo post-filter */ |
| 1111 | void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) |
| 1112 | { |
| 1113 | int i; |
| 1114 | spx_word16_t leak2; |
| 1115 | int N; |
| 1116 | |
| 1117 | N = st->window_size; |
| 1118 | |
| 1119 | /* Apply hanning window (should pre-compute it)*/ |
| 1120 | for (i=0;i<N;i++) |
| 1121 | st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); |
| 1122 | |
| 1123 | /* Compute power spectrum of the echo */ |
| 1124 | spx_fft(st->fft_table, st->y, st->Y); |
| 1125 | power_spectrum(st->Y, residual_echo, N); |
| 1126 | |
| 1127 | #ifdef FIXED_POINT |
| 1128 | if (st->leak_estimate > 16383) |
| 1129 | leak2 = 32767; |
| 1130 | else |
| 1131 | leak2 = SHL16(st->leak_estimate, 1); |
| 1132 | #else |
| 1133 | if (st->leak_estimate>.5) |
| 1134 | leak2 = 1; |
| 1135 | else |
| 1136 | leak2 = 2*st->leak_estimate; |
| 1137 | #endif |
| 1138 | /* Estimate residual echo */ |
| 1139 | for (i=0;i<=st->frame_size;i++) |
| 1140 | residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); |
| 1141 | |
| 1142 | } |
| 1143 | |
| 1144 | int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) |
| 1145 | { |
| 1146 | switch(request) |
| 1147 | { |
| 1148 | |
| 1149 | case SPEEX_ECHO_GET_FRAME_SIZE: |
| 1150 | (*(int*)ptr) = st->frame_size; |
| 1151 | break; |
| 1152 | case SPEEX_ECHO_SET_SAMPLING_RATE: |
| 1153 | st->sampling_rate = (*(int*)ptr); |
| 1154 | st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); |
| 1155 | #ifdef FIXED_POINT |
| 1156 | st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); |
| 1157 | st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); |
| 1158 | #else |
| 1159 | st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; |
| 1160 | st->beta_max = (.5f*st->frame_size)/st->sampling_rate; |
| 1161 | #endif |
| 1162 | if (st->sampling_rate<12000) |
| 1163 | st->notch_radius = QCONST16(.9, 15); |
| 1164 | else if (st->sampling_rate<24000) |
| 1165 | st->notch_radius = QCONST16(.982, 15); |
| 1166 | else |
| 1167 | st->notch_radius = QCONST16(.992, 15); |
| 1168 | break; |
| 1169 | case SPEEX_ECHO_GET_SAMPLING_RATE: |
| 1170 | (*(int*)ptr) = st->sampling_rate; |
| 1171 | break; |
| 1172 | default: |
| 1173 | speex_warning_int("Unknown speex_echo_ctl request: ", request); |
| 1174 | return -1; |
| 1175 | } |
| 1176 | return 0; |
| 1177 | } |