blob: d97e678a59d84da518ee76382ca7ca2434267ed5 [file] [log] [blame]
Nanang Izzuddin3cbb0652008-06-10 14:09:37 +00001/* Copyright (C) 2003-2008 Jean-Marc Valin
2
3 File: mdf.c
4 Echo canceller based on the MDF algorithm (see below)
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 3. The name of the author may not be used to endorse or promote products
18 derived from this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31*/
32
33/*
34 The echo canceller is based on the MDF algorithm described in:
35
36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38 February 1990.
39
40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41 double-talk is achieved using a variable learning rate as described in:
42
43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44 Cancellation With Double-Talk. IEEE Transactions on Audio,
45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47
48 There is no explicit double-talk detection, but a continuous variation
49 in the learning rate based on residual echo, double-talk and background
50 noise.
51
52 About the fixed-point version:
53 All the signals are represented with 16-bit words. The filter weights
54 are represented with 32-bit words, but only the top 16 bits are used
55 in most cases. The lower 16 bits are completely unreliable (due to the
56 fact that the update is done only on the top bits), but help in the
57 adaptation -- probably by removing a "threshold effect" due to
58 quantization (rounding going to zero) when the gradient is small.
59
60 Another kludge that seems to work good: when performing the weight
61 update, we only move half the way toward the "goal" this seems to
62 reduce the effect of quantization noise in the update phase. This
63 can be seen as applying a gradient descent on a "soft constraint"
64 instead of having a hard constraint.
65
66*/
67
68#ifdef HAVE_CONFIG_H
69#include "config.h"
70#endif
71
72#include "arch.h"
73#include "speex/speex_echo.h"
74#include "fftwrap.h"
75#include "pseudofloat.h"
76#include "math_approx.h"
77#include "os_support.h"
78
79#ifndef M_PI
80#define M_PI 3.14159265358979323846
81#endif
82
83#ifdef FIXED_POINT
84#define WEIGHT_SHIFT 11
85#define NORMALIZE_SCALEDOWN 5
86#define NORMALIZE_SCALEUP 3
87#else
88#define WEIGHT_SHIFT 0
89#endif
90
91#ifdef FIXED_POINT
92#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
93#else
94#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
95#endif
96
97/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
98 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
99#define TWO_PATH
100
101#ifdef FIXED_POINT
102static const spx_float_t MIN_LEAK = {20972, -22};
103
104/* Constants for the two-path filter */
105static const spx_float_t VAR1_SMOOTH = {23593, -16};
106static const spx_float_t VAR2_SMOOTH = {23675, -15};
107static const spx_float_t VAR1_UPDATE = {16384, -15};
108static const spx_float_t VAR2_UPDATE = {16384, -16};
109static const spx_float_t VAR_BACKTRACK = {16384, -12};
110#define TOP16(x) ((x)>>16)
111
112#else
113
114static const spx_float_t MIN_LEAK = .005f;
115
116/* Constants for the two-path filter */
117static const spx_float_t VAR1_SMOOTH = .36f;
118static const spx_float_t VAR2_SMOOTH = .7225f;
119static const spx_float_t VAR1_UPDATE = .5f;
120static const spx_float_t VAR2_UPDATE = .25f;
121static const spx_float_t VAR_BACKTRACK = 4.f;
122#define TOP16(x) (x)
123#endif
124
125
126#define PLAYBACK_DELAY 2
127
128void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
129
130
131/** Speex echo cancellation state. */
132struct SpeexEchoState_ {
133 int frame_size; /**< Number of samples processed each time */
134 int window_size;
135 int M;
136 int cancel_count;
137 int adapted;
138 int saturated;
139 int screwed_up;
140 int C; /** Number of input channels (microphones) */
141 int K; /** Number of output channels (loudspeakers) */
142 spx_int32_t sampling_rate;
143 spx_word16_t spec_average;
144 spx_word16_t beta0;
145 spx_word16_t beta_max;
146 spx_word32_t sum_adapt;
147 spx_word16_t leak_estimate;
148
149 spx_word16_t *e; /* scratch */
150 spx_word16_t *x; /* Far-end input buffer (2N) */
151 spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */
152 spx_word16_t *input; /* scratch */
153 spx_word16_t *y; /* scratch */
154 spx_word16_t *last_y;
155 spx_word16_t *Y; /* scratch */
156 spx_word16_t *E;
157 spx_word32_t *PHI; /* scratch */
158 spx_word32_t *W; /* (Background) filter weights */
159#ifdef TWO_PATH
160 spx_word16_t *foreground; /* Foreground filter weights */
161 spx_word32_t Davg1; /* 1st recursive average of the residual power difference */
162 spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */
163 spx_float_t Dvar1; /* Estimated variance of 1st estimator */
164 spx_float_t Dvar2; /* Estimated variance of 2nd estimator */
165#endif
166 spx_word32_t *power; /* Power of the far-end signal */
167 spx_float_t *power_1;/* Inverse power of far-end */
168 spx_word16_t *wtmp; /* scratch */
169#ifdef FIXED_POINT
170 spx_word16_t *wtmp2; /* scratch */
171#endif
172 spx_word32_t *Rf; /* scratch */
173 spx_word32_t *Yf; /* scratch */
174 spx_word32_t *Xf; /* scratch */
175 spx_word32_t *Eh;
176 spx_word32_t *Yh;
177 spx_float_t Pey;
178 spx_float_t Pyy;
179 spx_word16_t *window;
180 spx_word16_t *prop;
181 void *fft_table;
182 spx_word16_t *memX, *memD, *memE;
183 spx_word16_t preemph;
184 spx_word16_t notch_radius;
185 spx_mem_t *notch_mem;
186
187 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
188 spx_int16_t *play_buf;
189 int play_buf_pos;
190 int play_buf_started;
191};
192
193static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
194{
195 int i;
196 spx_word16_t den2;
197#ifdef FIXED_POINT
198 den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
199#else
200 den2 = radius*radius + .7*(1-radius)*(1-radius);
201#endif
202 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
203 for (i=0;i<len;i++)
204 {
205 spx_word16_t vin = in[i*stride];
206 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
207#ifdef FIXED_POINT
208 mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
209#else
210 mem[0] = mem[1] + 2*(-vin + radius*vout);
211#endif
212 mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
213 out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
214 }
215}
216
217/* This inner product is slightly different from the codec version because of fixed-point */
218static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
219{
220 spx_word32_t sum=0;
221 len >>= 1;
222 while(len--)
223 {
224 spx_word32_t part=0;
225 part = MAC16_16(part,*x++,*y++);
226 part = MAC16_16(part,*x++,*y++);
227 /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
228 sum = ADD32(sum,SHR32(part,6));
229 }
230 return sum;
231}
232
233/** Compute power spectrum of a half-complex (packed) vector */
234static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
235{
236 int i, j;
237 ps[0]=MULT16_16(X[0],X[0]);
238 for (i=1,j=1;i<N-1;i+=2,j++)
239 {
240 ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
241 }
242 ps[j]=MULT16_16(X[i],X[i]);
243}
244
245/** Compute power spectrum of a half-complex (packed) vector and accumulate */
246static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
247{
248 int i, j;
249 ps[0]+=MULT16_16(X[0],X[0]);
250 for (i=1,j=1;i<N-1;i+=2,j++)
251 {
252 ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
253 }
254 ps[j]+=MULT16_16(X[i],X[i]);
255}
256
257/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
258#ifdef FIXED_POINT
259static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
260{
261 int i,j;
262 spx_word32_t tmp1=0,tmp2=0;
263 for (j=0;j<M;j++)
264 {
265 tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
266 }
267 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
268 for (i=1;i<N-1;i+=2)
269 {
270 tmp1 = tmp2 = 0;
271 for (j=0;j<M;j++)
272 {
273 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
274 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
275 }
276 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
277 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
278 }
279 tmp1 = tmp2 = 0;
280 for (j=0;j<M;j++)
281 {
282 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
283 }
284 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
285}
286static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
287{
288 int i,j;
289 spx_word32_t tmp1=0,tmp2=0;
290 for (j=0;j<M;j++)
291 {
292 tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
293 }
294 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
295 for (i=1;i<N-1;i+=2)
296 {
297 tmp1 = tmp2 = 0;
298 for (j=0;j<M;j++)
299 {
300 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
301 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
302 }
303 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
304 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
305 }
306 tmp1 = tmp2 = 0;
307 for (j=0;j<M;j++)
308 {
309 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
310 }
311 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
312}
313
314#else
315static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
316{
317 int i,j;
318 for (i=0;i<N;i++)
319 acc[i] = 0;
320 for (j=0;j<M;j++)
321 {
322 acc[0] += X[0]*Y[0];
323 for (i=1;i<N-1;i+=2)
324 {
325 acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
326 acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
327 }
328 acc[i] += X[i]*Y[i];
329 X += N;
330 Y += N;
331 }
332}
333#define spectral_mul_accum16 spectral_mul_accum
334#endif
335
336/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
337static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
338{
339 int i, j;
340 spx_float_t W;
341 W = FLOAT_AMULT(p, w[0]);
342 prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
343 for (i=1,j=1;i<N-1;i+=2,j++)
344 {
345 W = FLOAT_AMULT(p, w[j]);
346 prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
347 prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
348 }
349 W = FLOAT_AMULT(p, w[j]);
350 prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
351}
352
353static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
354{
355 int i, j, p;
356 spx_word16_t max_sum = 1;
357 spx_word32_t prop_sum = 1;
358 for (i=0;i<M;i++)
359 {
360 spx_word32_t tmp = 1;
361 for (p=0;p<P;p++)
362 for (j=0;j<N;j++)
363 tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
364#ifdef FIXED_POINT
365 /* Just a security in case an overflow were to occur */
366 tmp = MIN32(ABS32(tmp), 536870912);
367#endif
368 prop[i] = spx_sqrt(tmp);
369 if (prop[i] > max_sum)
370 max_sum = prop[i];
371 }
372 for (i=0;i<M;i++)
373 {
374 prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
375 prop_sum += EXTEND32(prop[i]);
376 }
377 for (i=0;i<M;i++)
378 {
379 prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
380 /*printf ("%f ", prop[i]);*/
381 }
382 /*printf ("\n");*/
383}
384
385#ifdef DUMP_ECHO_CANCEL_DATA
386#include <stdio.h>
387static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
388
389static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
390{
391 if (!(rFile && pFile && oFile))
392 {
393 speex_fatal("Dump files not open");
394 }
395 fwrite(rec, sizeof(spx_int16_t), len, rFile);
396 fwrite(play, sizeof(spx_int16_t), len, pFile);
397 fwrite(out, sizeof(spx_int16_t), len, oFile);
398}
399#endif
400
401/** Creates a new echo canceller state */
402EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
403{
404 return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
405}
406
407EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
408{
409 int i,N,M, C, K;
410 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
411
412 st->K = nb_speakers;
413 st->C = nb_mic;
414 C=st->C;
415 K=st->K;
416#ifdef DUMP_ECHO_CANCEL_DATA
417 if (rFile || pFile || oFile)
418 speex_fatal("Opening dump files twice");
419 rFile = fopen("aec_rec.sw", "wb");
420 pFile = fopen("aec_play.sw", "wb");
421 oFile = fopen("aec_out.sw", "wb");
422#endif
423
424 st->frame_size = frame_size;
425 st->window_size = 2*frame_size;
426 N = st->window_size;
427 M = st->M = (filter_length+st->frame_size-1)/frame_size;
428 st->cancel_count=0;
429 st->sum_adapt = 0;
430 st->saturated = 0;
431 st->screwed_up = 0;
432 /* This is the default sampling rate */
433 st->sampling_rate = 8000;
434 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
435#ifdef FIXED_POINT
436 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
437 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
438#else
439 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
440 st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
441#endif
442 st->leak_estimate = 0;
443
444 st->fft_table = spx_fft_init(N);
445
446 st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
447 st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
448 st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
449 st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
450 st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
451 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
452 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
453 st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
454 st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
455 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
456
457 st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
458 st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
459 st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
460 st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
461#ifdef TWO_PATH
462 st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
463#endif
464 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
465 st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
466 st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
467 st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
468 st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
469 st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
470#ifdef FIXED_POINT
471 st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
472 for (i=0;i<N>>1;i++)
473 {
474 st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
475 st->window[N-i-1] = st->window[i];
476 }
477#else
478 for (i=0;i<N;i++)
479 st->window[i] = .5-.5*cos(2*M_PI*i/N);
480#endif
481 for (i=0;i<=st->frame_size;i++)
482 st->power_1[i] = FLOAT_ONE;
483 for (i=0;i<N*M*K*C;i++)
484 st->W[i] = 0;
485 {
486 spx_word32_t sum = 0;
487 /* Ratio of ~10 between adaptation rate of first and last block */
488 spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
489 st->prop[0] = QCONST16(.7, 15);
490 sum = EXTEND32(st->prop[0]);
491 for (i=1;i<M;i++)
492 {
493 st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
494 sum = ADD32(sum, EXTEND32(st->prop[i]));
495 }
496 for (i=M-1;i>=0;i--)
497 {
498 st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
499 }
500 }
501
502 st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
503 st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
504 st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
505 st->preemph = QCONST16(.9,15);
506 if (st->sampling_rate<12000)
507 st->notch_radius = QCONST16(.9, 15);
508 else if (st->sampling_rate<24000)
509 st->notch_radius = QCONST16(.982, 15);
510 else
511 st->notch_radius = QCONST16(.992, 15);
512
513 st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
514 st->adapted = 0;
515 st->Pey = st->Pyy = FLOAT_ONE;
516
517#ifdef TWO_PATH
518 st->Davg1 = st->Davg2 = 0;
519 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
520#endif
521
522 st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
523 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
524 st->play_buf_started = 0;
525
526 return st;
527}
528
529/** Resets echo canceller state */
530EXPORT void speex_echo_state_reset(SpeexEchoState *st)
531{
532 int i, M, N, C, K;
533 st->cancel_count=0;
534 st->screwed_up = 0;
535 N = st->window_size;
536 M = st->M;
537 C=st->C;
538 K=st->K;
539 for (i=0;i<N*M;i++)
540 st->W[i] = 0;
541#ifdef TWO_PATH
542 for (i=0;i<N*M;i++)
543 st->foreground[i] = 0;
544#endif
545 for (i=0;i<N*(M+1);i++)
546 st->X[i] = 0;
547 for (i=0;i<=st->frame_size;i++)
548 {
549 st->power[i] = 0;
550 st->power_1[i] = FLOAT_ONE;
551 st->Eh[i] = 0;
552 st->Yh[i] = 0;
553 }
554 for (i=0;i<st->frame_size;i++)
555 {
556 st->last_y[i] = 0;
557 }
558 for (i=0;i<N*C;i++)
559 {
560 st->E[i] = 0;
561 }
562 for (i=0;i<N*K;i++)
563 {
564 st->x[i] = 0;
565 }
566 for (i=0;i<2*C;i++)
567 st->notch_mem[i] = 0;
568 for (i=0;i<C;i++)
569 st->memD[i]=st->memE[i]=0;
570 for (i=0;i<K;i++)
571 st->memX[i]=0;
572
573 st->saturated = 0;
574 st->adapted = 0;
575 st->sum_adapt = 0;
576 st->Pey = st->Pyy = FLOAT_ONE;
577#ifdef TWO_PATH
578 st->Davg1 = st->Davg2 = 0;
579 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
580#endif
581 for (i=0;i<3*st->frame_size;i++)
582 st->play_buf[i] = 0;
583 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
584 st->play_buf_started = 0;
585
586}
587
588/** Destroys an echo canceller state */
589EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
590{
591 spx_fft_destroy(st->fft_table);
592
593 speex_free(st->e);
594 speex_free(st->x);
595 speex_free(st->input);
596 speex_free(st->y);
597 speex_free(st->last_y);
598 speex_free(st->Yf);
599 speex_free(st->Rf);
600 speex_free(st->Xf);
601 speex_free(st->Yh);
602 speex_free(st->Eh);
603
604 speex_free(st->X);
605 speex_free(st->Y);
606 speex_free(st->E);
607 speex_free(st->W);
608#ifdef TWO_PATH
609 speex_free(st->foreground);
610#endif
611 speex_free(st->PHI);
612 speex_free(st->power);
613 speex_free(st->power_1);
614 speex_free(st->window);
615 speex_free(st->prop);
616 speex_free(st->wtmp);
617#ifdef FIXED_POINT
618 speex_free(st->wtmp2);
619#endif
620 speex_free(st->play_buf);
621 speex_free(st);
622
623#ifdef DUMP_ECHO_CANCEL_DATA
624 fclose(rFile);
625 fclose(pFile);
626 fclose(oFile);
627 rFile = pFile = oFile = NULL;
628#endif
629}
630
631EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
632{
633 int i;
634 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
635 st->play_buf_started = 1;
636 if (st->play_buf_pos>=st->frame_size)
637 {
638 speex_echo_cancellation(st, rec, st->play_buf, out);
639 st->play_buf_pos -= st->frame_size;
640 for (i=0;i<st->play_buf_pos;i++)
641 st->play_buf[i] = st->play_buf[i+st->frame_size];
642 } else {
643 speex_warning("No playback frame available (your application is buggy and/or got xruns)");
644 if (st->play_buf_pos!=0)
645 {
646 speex_warning("internal playback buffer corruption?");
647 st->play_buf_pos = 0;
648 }
649 for (i=0;i<st->frame_size;i++)
650 out[i] = rec[i];
651 }
652}
653
654EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
655{
656 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
657 if (!st->play_buf_started)
658 {
659 speex_warning("discarded first playback frame");
660 return;
661 }
662 if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
663 {
664 int i;
665 for (i=0;i<st->frame_size;i++)
666 st->play_buf[st->play_buf_pos+i] = play[i];
667 st->play_buf_pos += st->frame_size;
668 if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
669 {
670 speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
671 for (i=0;i<st->frame_size;i++)
672 st->play_buf[st->play_buf_pos+i] = play[i];
673 st->play_buf_pos += st->frame_size;
674 }
675 } else {
676 speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
677 }
678}
679
680/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
681EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
682{
683 speex_echo_cancellation(st, in, far_end, out);
684}
685
686/** Performs echo cancellation on a frame */
687EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
688{
689 int i,j, chan, speak;
690 int N,M, C, K;
691 spx_word32_t Syy,See,Sxx,Sdd, Sff;
692#ifdef TWO_PATH
693 spx_word32_t Dbf;
694 int update_foreground;
695#endif
696 spx_word32_t Sey;
697 spx_word16_t ss, ss_1;
698 spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
699 spx_float_t alpha, alpha_1;
700 spx_word16_t RER;
701 spx_word32_t tmp32;
702
703 N = st->window_size;
704 M = st->M;
705 C = st->C;
706 K = st->K;
707
708 st->cancel_count++;
709#ifdef FIXED_POINT
710 ss=DIV32_16(11469,M);
711 ss_1 = SUB16(32767,ss);
712#else
713 ss=.35/M;
714 ss_1 = 1-ss;
715#endif
716
717 for (chan = 0; chan < C; chan++)
718 {
719 /* Apply a notch filter to make sure DC doesn't end up causing problems */
720 filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
721 /* Copy input data to buffer and apply pre-emphasis */
722 /* Copy input data to buffer */
723 for (i=0;i<st->frame_size;i++)
724 {
725 spx_word32_t tmp32;
726 /* FIXME: This core has changed a bit, need to merge properly */
727 tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
728#ifdef FIXED_POINT
729 if (tmp32 > 32767)
730 {
731 tmp32 = 32767;
732 if (st->saturated == 0)
733 st->saturated = 1;
734 }
735 if (tmp32 < -32767)
736 {
737 tmp32 = -32767;
738 if (st->saturated == 0)
739 st->saturated = 1;
740 }
741#endif
742 st->memD[chan] = st->input[chan*st->frame_size+i];
743 st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
744 }
745 }
746
747 for (speak = 0; speak < K; speak++)
748 {
749 for (i=0;i<st->frame_size;i++)
750 {
751 spx_word32_t tmp32;
752 st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
753 tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
754#ifdef FIXED_POINT
755 /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
756 if (tmp32 > 32767)
757 {
758 tmp32 = 32767;
759 st->saturated = M+1;
760 }
761 if (tmp32 < -32767)
762 {
763 tmp32 = -32767;
764 st->saturated = M+1;
765 }
766#endif
767 st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
768 st->memX[speak] = far_end[i*K+speak];
769 }
770 }
771
772 for (speak = 0; speak < K; speak++)
773 {
774 /* Shift memory: this could be optimized eventually*/
775 for (j=M-1;j>=0;j--)
776 {
777 for (i=0;i<N;i++)
778 st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
779 }
780 /* Convert x (echo input) to frequency domain */
781 spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
782 }
783
784 Sxx = 0;
785 for (speak = 0; speak < K; speak++)
786 {
787 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
788 power_spectrum_accum(st->X+speak*N, st->Xf, N);
789 }
790
791 Sff = 0;
792 for (chan = 0; chan < C; chan++)
793 {
794#ifdef TWO_PATH
795 /* Compute foreground filter */
796 spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
797 spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
798 for (i=0;i<st->frame_size;i++)
799 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
800 Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
801#endif
802 }
803
804 /* Adjust proportional adaption rate */
805 /* FIXME: Adjust that for C, K*/
806 if (st->adapted)
807 mdf_adjust_prop (st->W, N, M, C*K, st->prop);
808 /* Compute weight gradient */
809 if (st->saturated == 0)
810 {
811 for (chan = 0; chan < C; chan++)
812 {
813 for (speak = 0; speak < K; speak++)
814 {
815 for (j=M-1;j>=0;j--)
816 {
817 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
818 for (i=0;i<N;i++)
819 st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
820 }
821 }
822 }
823 } else {
824 st->saturated--;
825 }
826
827 /* FIXME: MC conversion required */
828 /* Update weight to prevent circular convolution (MDF / AUMDF) */
829 for (chan = 0; chan < C; chan++)
830 {
831 for (speak = 0; speak < K; speak++)
832 {
833 for (j=0;j<M;j++)
834 {
835 /* This is a variant of the Alternatively Updated MDF (AUMDF) */
836 /* Remove the "if" to make this an MDF filter */
837 if (j==0 || st->cancel_count%(M-1) == j-1)
838 {
839#ifdef FIXED_POINT
840 for (i=0;i<N;i++)
841 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
842 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
843 for (i=0;i<st->frame_size;i++)
844 {
845 st->wtmp[i]=0;
846 }
847 for (i=st->frame_size;i<N;i++)
848 {
849 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
850 }
851 spx_fft(st->fft_table, st->wtmp, st->wtmp2);
852 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
853 for (i=0;i<N;i++)
854 st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
855#else
856 spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
857 for (i=st->frame_size;i<N;i++)
858 {
859 st->wtmp[i]=0;
860 }
861 spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
862#endif
863 }
864 }
865 }
866 }
867
868 /* So we can use power_spectrum_accum */
869 for (i=0;i<=st->frame_size;i++)
870 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
871
872 Dbf = 0;
873 See = 0;
874#ifdef TWO_PATH
875 /* Difference in response, this is used to estimate the variance of our residual power estimate */
876 for (chan = 0; chan < C; chan++)
877 {
878 spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
879 spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
880 for (i=0;i<st->frame_size;i++)
881 st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
882 Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
883 for (i=0;i<st->frame_size;i++)
884 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
885 See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
886 }
887#endif
888
889#ifndef TWO_PATH
890 Sff = See;
891#endif
892
893#ifdef TWO_PATH
894 /* Logic for updating the foreground filter */
895
896 /* For two time windows, compute the mean of the energy difference, as well as the variance */
897 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
898 st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
899 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
900 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
901
902 /* Equivalent float code:
903 st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
904 st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
905 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
906 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
907 */
908
909 update_foreground = 0;
910 /* Check if we have a statistically significant reduction in the residual echo */
911 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
912 if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
913 update_foreground = 1;
914 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
915 update_foreground = 1;
916 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
917 update_foreground = 1;
918
919 /* Do we update? */
920 if (update_foreground)
921 {
922 st->Davg1 = st->Davg2 = 0;
923 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
924 /* Copy background filter to foreground filter */
925 for (i=0;i<N*M*C*K;i++)
926 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
927 /* Apply a smooth transition so as to not introduce blocking artifacts */
928 for (chan = 0; chan < C; chan++)
929 for (i=0;i<st->frame_size;i++)
930 st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
931 } else {
932 int reset_background=0;
933 /* Otherwise, check if the background filter is significantly worse */
934 if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
935 reset_background = 1;
936 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
937 reset_background = 1;
938 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
939 reset_background = 1;
940 if (reset_background)
941 {
942 /* Copy foreground filter to background filter */
943 for (i=0;i<N*M*C*K;i++)
944 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
945 /* We also need to copy the output so as to get correct adaptation */
946 for (chan = 0; chan < C; chan++)
947 {
948 for (i=0;i<st->frame_size;i++)
949 st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
950 for (i=0;i<st->frame_size;i++)
951 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
952 }
953 See = Sff;
954 st->Davg1 = st->Davg2 = 0;
955 st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
956 }
957 }
958#endif
959
960 Sey = Syy = Sdd = 0;
961 for (chan = 0; chan < C; chan++)
962 {
963 /* Compute error signal (for the output with de-emphasis) */
964 for (i=0;i<st->frame_size;i++)
965 {
966 spx_word32_t tmp_out;
967#ifdef TWO_PATH
968 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
969#else
970 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
971#endif
972 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
973 /* This is an arbitrary test for saturation in the microphone signal */
974 if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
975 {
976 if (st->saturated == 0)
977 st->saturated = 1;
978 }
979 out[i*C+chan] = WORD2INT(tmp_out);
980 st->memE[chan] = tmp_out;
981 }
982
983#ifdef DUMP_ECHO_CANCEL_DATA
984 dump_audio(in, far_end, out, st->frame_size);
985#endif
986
987 /* Compute error signal (filter update version) */
988 for (i=0;i<st->frame_size;i++)
989 {
990 st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
991 st->e[chan*N+i] = 0;
992 }
993
994 /* Compute a bunch of correlations */
995 /* FIXME: bad merge */
996 Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
997 Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
998 Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
999
1000 /* Convert error to frequency domain */
1001 spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1002 for (i=0;i<st->frame_size;i++)
1003 st->y[i+chan*N] = 0;
1004 spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1005
1006 /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1007 power_spectrum_accum(st->E+chan*N, st->Rf, N);
1008 power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1009
1010 }
1011
1012 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1013
1014 /* Do some sanity check */
1015 if (!(Syy>=0 && Sxx>=0 && See >= 0)
1016#ifndef FIXED_POINT
1017 || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1018#endif
1019 )
1020 {
1021 /* Things have gone really bad */
1022 st->screwed_up += 50;
1023 for (i=0;i<st->frame_size*C;i++)
1024 out[i] = 0;
1025 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1026 {
1027 /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1028 st->screwed_up++;
1029 } else {
1030 /* Everything's fine */
1031 st->screwed_up=0;
1032 }
1033 if (st->screwed_up>=50)
1034 {
1035 speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1036 speex_echo_state_reset(st);
1037 return;
1038 }
1039
1040 /* Add a small noise floor to make sure not to have problems when dividing */
1041 See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1042
1043 for (speak = 0; speak < K; speak++)
1044 {
1045 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1046 power_spectrum_accum(st->X+speak*N, st->Xf, N);
1047 }
1048
1049
1050 /* Smooth far end energy estimate over time */
1051 for (j=0;j<=st->frame_size;j++)
1052 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1053
1054 /* Compute filtered spectra and (cross-)correlations */
1055 for (j=st->frame_size;j>=0;j--)
1056 {
1057 spx_float_t Eh, Yh;
1058 Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1059 Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1060 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1061 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1062#ifdef FIXED_POINT
1063 st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1064 st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1065#else
1066 st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1067 st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1068#endif
1069 }
1070
1071 Pyy = FLOAT_SQRT(Pyy);
1072 Pey = FLOAT_DIVU(Pey,Pyy);
1073
1074 /* Compute correlation updatete rate */
1075 tmp32 = MULT16_32_Q15(st->beta0,Syy);
1076 if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1077 tmp32 = MULT16_32_Q15(st->beta_max,See);
1078 alpha = FLOAT_DIV32(tmp32, See);
1079 alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1080 /* Update correlations (recursive average) */
1081 st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1082 st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1083 if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1084 st->Pyy = FLOAT_ONE;
1085 /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1086 if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1087 st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1088 if (FLOAT_GT(st->Pey, st->Pyy))
1089 st->Pey = st->Pyy;
1090 /* leak_estimate is the linear regression result */
1091 st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1092 /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1093 if (st->leak_estimate > 16383)
1094 st->leak_estimate = 32767;
1095 else
1096 st->leak_estimate = SHL16(st->leak_estimate,1);
1097 /*printf ("%f\n", st->leak_estimate);*/
1098
1099 /* Compute Residual to Error Ratio */
1100#ifdef FIXED_POINT
1101 tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1102 tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1103 /* Check for y in e (lower bound on RER) */
1104 {
1105 spx_float_t bound = PSEUDOFLOAT(Sey);
1106 bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1107 if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1108 tmp32 = See;
1109 else if (tmp32 < FLOAT_EXTRACT32(bound))
1110 tmp32 = FLOAT_EXTRACT32(bound);
1111 }
1112 if (tmp32 > SHR32(See,1))
1113 tmp32 = SHR32(See,1);
1114 RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1115#else
1116 RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1117 /* Check for y in e (lower bound on RER) */
1118 if (RER < Sey*Sey/(1+See*Syy))
1119 RER = Sey*Sey/(1+See*Syy);
1120 if (RER > .5)
1121 RER = .5;
1122#endif
1123
1124 /* We consider that the filter has had minimal adaptation if the following is true*/
1125 if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1126 {
1127 st->adapted = 1;
1128 }
1129
1130 if (st->adapted)
1131 {
1132 /* Normal learning rate calculation once we're past the minimal adaptation phase */
1133 for (i=0;i<=st->frame_size;i++)
1134 {
1135 spx_word32_t r, e;
1136 /* Compute frequency-domain adaptation mask */
1137 r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1138 e = SHL32(st->Rf[i],3)+1;
1139#ifdef FIXED_POINT
1140 if (r>SHR32(e,1))
1141 r = SHR32(e,1);
1142#else
1143 if (r>.5*e)
1144 r = .5*e;
1145#endif
1146 r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1147 /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1148 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1149 }
1150 } else {
1151 /* Temporary adaption rate if filter is not yet adapted enough */
1152 spx_word16_t adapt_rate=0;
1153
1154 if (Sxx > SHR32(MULT16_16(N, 1000),6))
1155 {
1156 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1157#ifdef FIXED_POINT
1158 if (tmp32 > SHR32(See,2))
1159 tmp32 = SHR32(See,2);
1160#else
1161 if (tmp32 > .25*See)
1162 tmp32 = .25*See;
1163#endif
1164 adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1165 }
1166 for (i=0;i<=st->frame_size;i++)
1167 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1168
1169
1170 /* How much have we adapted so far? */
1171 st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1172 }
1173
1174 /* FIXME: MC conversion required */
1175 for (i=0;i<st->frame_size;i++)
1176 st->last_y[i] = st->last_y[st->frame_size+i];
1177 if (st->adapted)
1178 {
1179 /* If the filter is adapted, take the filtered echo */
1180 for (i=0;i<st->frame_size;i++)
1181 st->last_y[st->frame_size+i] = in[i]-out[i];
1182 } else {
1183 /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1184 /* moved earlier: for (i=0;i<N;i++)
1185 st->last_y[i] = st->x[i];*/
1186 }
1187
1188}
1189
1190/* Compute spectrum of estimated echo for use in an echo post-filter */
1191void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1192{
1193 int i;
1194 spx_word16_t leak2;
1195 int N;
1196
1197 N = st->window_size;
1198
1199 /* Apply hanning window (should pre-compute it)*/
1200 for (i=0;i<N;i++)
1201 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1202
1203 /* Compute power spectrum of the echo */
1204 spx_fft(st->fft_table, st->y, st->Y);
1205 power_spectrum(st->Y, residual_echo, N);
1206
1207#ifdef FIXED_POINT
1208 if (st->leak_estimate > 16383)
1209 leak2 = 32767;
1210 else
1211 leak2 = SHL16(st->leak_estimate, 1);
1212#else
1213 if (st->leak_estimate>.5)
1214 leak2 = 1;
1215 else
1216 leak2 = 2*st->leak_estimate;
1217#endif
1218 /* Estimate residual echo */
1219 for (i=0;i<=st->frame_size;i++)
1220 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1221
1222}
1223
1224EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1225{
1226 switch(request)
1227 {
1228
1229 case SPEEX_ECHO_GET_FRAME_SIZE:
1230 (*(int*)ptr) = st->frame_size;
1231 break;
1232 case SPEEX_ECHO_SET_SAMPLING_RATE:
1233 st->sampling_rate = (*(int*)ptr);
1234 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1235#ifdef FIXED_POINT
1236 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1237 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1238#else
1239 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1240 st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1241#endif
1242 if (st->sampling_rate<12000)
1243 st->notch_radius = QCONST16(.9, 15);
1244 else if (st->sampling_rate<24000)
1245 st->notch_radius = QCONST16(.982, 15);
1246 else
1247 st->notch_radius = QCONST16(.992, 15);
1248 break;
1249 case SPEEX_ECHO_GET_SAMPLING_RATE:
1250 (*(int*)ptr) = st->sampling_rate;
1251 break;
1252 case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1253 /*FIXME: Implement this for multiple channels */
1254 *((spx_int32_t *)ptr) = st->M * st->frame_size;
1255 break;
1256 case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1257 {
1258 int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1259 spx_int32_t *filt = (spx_int32_t *) ptr;
1260 for(j=0;j<M;j++)
1261 {
1262 /*FIXME: Implement this for multiple channels */
1263#ifdef FIXED_POINT
1264 for (i=0;i<N;i++)
1265 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1266 spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1267#else
1268 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1269#endif
1270 for(i=0;i<n;i++)
1271 filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1272 }
1273 }
1274 break;
1275 default:
1276 speex_warning_int("Unknown speex_echo_ctl request: ", request);
1277 return -1;
1278 }
1279 return 0;
1280}