blob: 85a6efb4a4aca6f1f430a564a0cf04fd0d438c1f [file] [log] [blame]
Tristan Matthews0a329cc2013-07-17 13:20:14 -04001/* $Id$ */
2/*
3 * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4 * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include <pjmedia/types.h>
21#include <pjmedia/alaw_ulaw.h>
22#include <pjmedia/errno.h>
23#include <pjmedia/frame.h>
24#include <pjmedia/silencedet.h>
25#include <pj/array.h>
26#include <pj/assert.h>
27#include <pj/lock.h>
28#include <pj/log.h>
29#include <pj/os.h>
30#include <pj/pool.h>
31
32#include "echo_internal.h"
33
34#define THIS_FILE "echo_suppress.c"
35
36/* Maximum float constant */
37#define MAX_FLOAT (float)1.701411e38
38
39/* The effective learn duration (in seconds) before we declare that learning
40 * is complete. The actual learning duration itself may be longer depending
41 * on the conversation pattern (e.g. we can't detect echo if speaker is only
42 * playing silence).
43 */
44#define MAX_CALC_DURATION_SEC 3
45
46/* The internal audio segment length, in milliseconds. 10ms shold be good
47 * and no need to change it.
48 */
49#define SEGMENT_PTIME 10
50
51/* The length of the template signal in milliseconds. The longer the template,
52 * the better correlation will be found, at the expense of more processing
53 * and longer learning time.
54 */
55#define TEMPLATE_PTIME 200
56
57/* How long to look back in the past to see if either mic or speaker is
58 * active.
59 */
60#define SIGNAL_LOOKUP_MSEC 200
61
62/* The minimum level value to be considered as talking, in uLaw complement
63 * (0-255).
64 */
65#define MIN_SIGNAL_ULAW 35
66
67/* The period (in seconds) on which the ES will analize it's effectiveness,
68 * and it may trigger soft-reset to force recalculation.
69 */
70#define CHECK_PERIOD 30
71
72/* Maximum signal level of average echo residue (in uLaw complement). When
73 * the residue value exceeds this value, we force the ES to re-learn.
74 */
75#define MAX_RESIDUE 2.5
76
77
78#if 0
79# define TRACE_(expr) PJ_LOG(5,expr)
80#else
81# define TRACE_(expr)
82#endif
83
84PJ_INLINE(float) FABS(float val)
85{
86 if (val < 0)
87 return -val;
88 else
89 return val;
90}
91
92
93#if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0
94 typedef float pj_ufloat_t;
95# define pj_ufloat_from_float(f) (f)
96# define pj_ufloat_mul_u(val1, f) ((val1) * (f))
97# define pj_ufloat_mul_i(val1, f) ((val1) * (f))
98#else
99 typedef pj_uint32_t pj_ufloat_t;
100
101 pj_ufloat_t pj_ufloat_from_float(float f)
102 {
103 return (pj_ufloat_t)(f * 65536);
104 }
105
106 unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2)
107 {
108 return (val1 * val2) >> 16;
109 }
110
111 int pj_ufloat_mul_i(int val1, pj_ufloat_t val2)
112 {
113 return (val1 * (pj_int32_t)val2) >> 16;
114 }
115#endif
116
117
118/* Conversation state */
119typedef enum talk_state
120{
121 ST_NULL,
122 ST_LOCAL_TALK,
123 ST_REM_SILENT,
124 ST_DOUBLETALK,
125 ST_REM_TALK
126} talk_state_t;
127
128const char *state_names[] =
129{
130 "Null",
131 "local talking",
132 "remote silent",
133 "doubletalk",
134 "remote talking"
135};
136
137
138/* Description:
139
140 The echo suppressor tries to find the position of echoed signal by looking
141 at the correlation between signal played to the speaker (played signal)
142 and the signal captured from the microphone (recorded signal).
143
144 To do this, it first divides the frames (from mic and speaker) into
145 segments, calculate the audio level of the segment, and save the level
146 information in the playback and record history (play_hist and rec_hist
147 respectively).
148
149 In the history, the newest element (depicted as "t0" in the diagram belo)
150 is put in the last position of the array.
151
152 The record history size is as large as the template size (tmpl_cnt), since
153 we will use the record history as the template to find the best matching
154 position in the playback history.
155
156 Here is the record history buffer:
157
158 <--templ_cnt-->
159 +-------------+
160 | rec_hist |
161 +-------------+
162 t-templ_cnt......t0
163
164 As you can see, the newest frame ("t0") is put as the last element in the
165 array.
166
167 The playback history size is larger than record history, since we need to
168 find the matching pattern in the past. The playback history size is
169 "templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal
170 to the maximum tail length. The maximum tail length is set when the ES
171 is created.
172
173 Here is the playback history buffer:
174
175 <-----tail_cnt-----> <--templ_cnt-->
176 +-------------------+--------------+
177 | play_hist |
178 +-------------------+--------------+
179 t-play_hist_cnt...t-templ_cnt.......t0
180
181
182
183 Learning:
184
185 During the processing, the ES calculates the following values:
186 - the correlation value, that is how similar the playback signal compared
187 to the mic signal. The lower the correlation value the better (i.e. more
188 similar) the signal is. The correlation value is done over the template
189 duration.
190 - the gain scaling factor, that is the ratio between mic signal and
191 speaker signal. The ES calculates both the minimum and average ratios.
192
193 The ES calculates both the values above for every tail position in the
194 playback history. The values are saved in arrays below:
195
196 <-----tail_cnt----->
197 +-------------------+
198 | corr_sum |
199 +-------------------+
200 | min_factor |
201 +-------------------+
202 | avg_factor |
203 +-------------------+
204
205 At the end of processing, the ES iterates through the correlation array and
206 picks the tail index with the lowest corr_sum value. This is the position
207 where echo is most likely to be found.
208
209
210 Processing:
211
212 Once learning is done, the ES will change the level of the mic signal
213 depending on the state of the conversation and according to the ratio that
214 has been found in the learning phase above.
215
216 */
217
218/*
219 * The simple echo suppresor state
220 */
221typedef struct echo_supp
222{
223 unsigned clock_rate; /* Clock rate. */
224 pj_uint16_t samples_per_frame; /* Frame length in samples */
225 pj_uint16_t samples_per_segment;/* Segment length in samples */
226 pj_uint16_t tail_ms; /* Tail length in milliseconds */
227 pj_uint16_t tail_samples; /* Tail length in samples. */
228
229 pj_bool_t learning; /* Are we still learning yet? */
230 talk_state_t talk_state; /* Current talking state */
231 int tail_index; /* Echo location, -1 if not found */
232
233 unsigned max_calc; /* # of calc before learning complete.
234 (see MAX_CALC_DURATION_SEC) */
235 unsigned calc_cnt; /* Number of calculations so far */
236
237 unsigned update_cnt; /* # of updates */
238 unsigned templ_cnt; /* Template length, in # of segments */
239 unsigned tail_cnt; /* Tail length, in # of segments */
240 unsigned play_hist_cnt; /* # of segments in play_hist */
241 pj_uint16_t *play_hist; /* Array of playback levels */
242 pj_uint16_t *rec_hist; /* Array of rec levels */
243
244 float *corr_sum; /* Array of corr for each tail pos. */
245 float *tmp_corr; /* Temporary corr array calculation */
246 float best_corr; /* Best correlation so far. */
247
248 unsigned sum_rec_level; /* Running sum of level in rec_hist */
249 float rec_corr; /* Running corr in rec_hist. */
250
251 unsigned sum_play_level0; /* Running sum of level for first pos */
252 float play_corr0; /* Running corr for first pos . */
253
254 float *min_factor; /* Array of minimum scaling factor */
255 float *avg_factor; /* Array of average scaling factor */
256 float *tmp_factor; /* Array to store provisional result */
257
258 unsigned running_cnt; /* Running duration in # of frames */
259 float residue; /* Accummulated echo residue. */
260 float last_factor; /* Last factor applied to mic signal */
261} echo_supp;
262
263
264
265/*
266 * Create.
267 */
268PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool,
269 unsigned clock_rate,
270 unsigned channel_count,
271 unsigned samples_per_frame,
272 unsigned tail_ms,
273 unsigned options,
274 void **p_state )
275{
276 echo_supp *ec;
277
278 PJ_UNUSED_ARG(channel_count);
279 PJ_UNUSED_ARG(options);
280
281 PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000,
282 PJ_ENOTSUP);
283
284 ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp);
285 ec->clock_rate = clock_rate;
286 ec->samples_per_frame = (pj_uint16_t)samples_per_frame;
287 ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000);
288 ec->tail_ms = (pj_uint16_t)tail_ms;
289 ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000);
290
291 ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME;
292 ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME);
293 ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt);
294
295 ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate /
296 ec->samples_per_segment);
297
298 ec->rec_hist = (pj_uint16_t*)
299 pj_pool_alloc(pool, ec->templ_cnt *
300 sizeof(ec->rec_hist[0]));
301
302 /* Note: play history has twice number of elements */
303 ec->play_hist = (pj_uint16_t*)
304 pj_pool_alloc(pool, ec->play_hist_cnt *
305 sizeof(ec->play_hist[0]));
306
307 ec->corr_sum = (float*)
308 pj_pool_alloc(pool, ec->tail_cnt *
309 sizeof(ec->corr_sum[0]));
310 ec->tmp_corr = (float*)
311 pj_pool_alloc(pool, ec->tail_cnt *
312 sizeof(ec->tmp_corr[0]));
313 ec->min_factor = (float*)
314 pj_pool_alloc(pool, ec->tail_cnt *
315 sizeof(ec->min_factor[0]));
316 ec->avg_factor = (float*)
317 pj_pool_alloc(pool, ec->tail_cnt *
318 sizeof(ec->avg_factor[0]));
319 ec->tmp_factor = (float*)
320 pj_pool_alloc(pool, ec->tail_cnt *
321 sizeof(ec->tmp_factor[0]));
322 echo_supp_reset(ec);
323
324 *p_state = ec;
325 return PJ_SUCCESS;
326}
327
328
329/*
330 * Destroy.
331 */
332PJ_DEF(pj_status_t) echo_supp_destroy(void *state)
333{
334 PJ_UNUSED_ARG(state);
335 return PJ_SUCCESS;
336}
337
338
339/*
340 * Hard reset
341 */
342PJ_DEF(void) echo_supp_reset(void *state)
343{
344 unsigned i;
345 echo_supp *ec = (echo_supp*) state;
346
347 pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0]));
348 pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0]));
349
350 for (i=0; i<ec->tail_cnt; ++i) {
351 ec->corr_sum[i] = ec->avg_factor[i] = 0;
352 ec->min_factor[i] = MAX_FLOAT;
353 }
354
355 ec->update_cnt = 0;
356 ec->calc_cnt = 0;
357 ec->learning = PJ_TRUE;
358 ec->tail_index = -1;
359 ec->best_corr = MAX_FLOAT;
360 ec->talk_state = ST_NULL;
361 ec->last_factor = 1.0;
362 ec->residue = 0;
363 ec->running_cnt = 0;
364 ec->sum_rec_level = ec->sum_play_level0 = 0;
365 ec->rec_corr = ec->play_corr0 = 0;
366}
367
368/*
369 * Soft reset to force the EC to re-learn without having to discard all
370 * rec and playback history.
371 */
372PJ_DEF(void) echo_supp_soft_reset(void *state)
373{
374 unsigned i;
375
376 echo_supp *ec = (echo_supp*) state;
377
378 for (i=0; i<ec->tail_cnt; ++i) {
379 ec->corr_sum[i] = 0;
380 }
381
382 ec->update_cnt = 0;
383 ec->calc_cnt = 0;
384 ec->learning = PJ_TRUE;
385 ec->best_corr = MAX_FLOAT;
386 ec->residue = 0;
387 ec->running_cnt = 0;
388 ec->sum_rec_level = ec->sum_play_level0 = 0;
389 ec->rec_corr = ec->play_corr0 = 0;
390
391 PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning.."));
392}
393
394
395/* Set state */
396static void echo_supp_set_state(echo_supp *ec, talk_state_t state,
397 unsigned level)
398{
399 PJ_UNUSED_ARG(level);
400
401 if (state != ec->talk_state) {
402 TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s, level=%u",
403 (ec->update_cnt * SEGMENT_PTIME / 1000),
404 ((ec->update_cnt * SEGMENT_PTIME) % 1000),
405 state_names[ec->talk_state],
406 state_names[state], level));
407 ec->talk_state = state;
408 }
409}
410
411/*
412 * Update EC state
413 */
414static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm,
415 const pj_int16_t *play_frm)
416{
417 int prev_index;
418 unsigned i, j, frm_level, sum_play_level, ulaw;
419 pj_uint16_t old_rec_frm_level, old_play_frm_level;
420 float play_corr;
421
422 ++ec->update_cnt;
423 if (ec->update_cnt > 0x7FFFFFFF)
424 ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */
425
426 /* Calculate current play frame level */
427 frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment);
428 ++frm_level; /* to avoid division by zero */
429
430 /* Save the oldest frame level for later */
431 old_play_frm_level = ec->play_hist[0];
432
433 /* Push current frame level to the back of the play history */
434 pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0);
435 ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level;
436
437 /* Calculate level of current mic frame */
438 frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment);
439 ++frm_level; /* to avoid division by zero */
440
441 /* Save the oldest frame level for later */
442 old_rec_frm_level = ec->rec_hist[0];
443
444 /* Push to the back of the rec history */
445 pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0);
446 ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level;
447
448
449 /* Can't do the calc until the play history is full. */
450 if (ec->update_cnt < ec->play_hist_cnt)
451 return;
452
453 /* Skip if learning is done */
454 if (!ec->learning)
455 return;
456
457
458 /* Calculate rec signal pattern */
459 if (ec->sum_rec_level == 0) {
460 /* Buffer has just been filled up, do full calculation */
461 ec->rec_corr = 0;
462 ec->sum_rec_level = 0;
463 for (i=0; i < ec->templ_cnt-1; ++i) {
464 float corr;
465 corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i];
466 ec->rec_corr += corr;
467 ec->sum_rec_level += ec->rec_hist[i];
468 }
469 ec->sum_rec_level += ec->rec_hist[i];
470 } else {
471 /* Update from previous calculation */
472 ec->sum_rec_level = ec->sum_rec_level - old_rec_frm_level +
473 ec->rec_hist[ec->templ_cnt-1];
474 ec->rec_corr = ec->rec_corr - ((float)ec->rec_hist[0] /
475 old_rec_frm_level) +
476 ((float)ec->rec_hist[ec->templ_cnt-1] /
477 ec->rec_hist[ec->templ_cnt-2]);
478 }
479
480 /* Iterate through the play history and calculate the signal correlation
481 * for every tail position in the play_hist. Save the result in temporary
482 * array since we may bail out early if the conversation state is not good
483 * to detect echo.
484 */
485 /*
486 * First phase: do full calculation for the first position
487 */
488 if (ec->sum_play_level0 == 0) {
489 /* Buffer has just been filled up, do full calculation */
490 sum_play_level = 0;
491 play_corr = 0;
492 for (j=0; j<ec->templ_cnt-1; ++j) {
493 float corr;
494 corr = (float)ec->play_hist[j+1] / ec->play_hist[j];
495 play_corr += corr;
496 sum_play_level += ec->play_hist[j];
497 }
498 sum_play_level += ec->play_hist[j];
499 ec->sum_play_level0 = sum_play_level;
500 ec->play_corr0 = play_corr;
501 } else {
502 /* Update from previous calculation */
503 ec->sum_play_level0 = ec->sum_play_level0 - old_play_frm_level +
504 ec->play_hist[ec->templ_cnt-1];
505 ec->play_corr0 = ec->play_corr0 - ((float)ec->play_hist[0] /
506 old_play_frm_level) +
507 ((float)ec->play_hist[ec->templ_cnt-1] /
508 ec->play_hist[ec->templ_cnt-2]);
509 sum_play_level = ec->sum_play_level0;
510 play_corr = ec->play_corr0;
511 }
512 ec->tmp_corr[0] = FABS(play_corr - ec->rec_corr);
513 ec->tmp_factor[0] = (float)ec->sum_rec_level / sum_play_level;
514
515 /* Bail out if remote isn't talking */
516 ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
517 if (ulaw < MIN_SIGNAL_ULAW) {
518 echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
519 return;
520 }
521 /* Bail out if local user is talking */
522 if (ec->sum_rec_level >= sum_play_level) {
523 echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
524 return;
525 }
526
527 /*
528 * Second phase: do incremental calculation for the rest of positions
529 */
530 for (i=1; i < ec->tail_cnt; ++i) {
531 unsigned end;
532
533 end = i + ec->templ_cnt;
534
535 sum_play_level = sum_play_level - ec->play_hist[i-1] +
536 ec->play_hist[end-1];
537 play_corr = play_corr - ((float)ec->play_hist[i]/ec->play_hist[i-1]) +
538 ((float)ec->play_hist[end-1]/ec->play_hist[end-2]);
539
540 /* Bail out if remote isn't talking */
541 ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
542 if (ulaw < MIN_SIGNAL_ULAW) {
543 echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
544 return;
545 }
546
547 /* Bail out if local user is talking */
548 if (ec->sum_rec_level >= sum_play_level) {
549 echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
550 return;
551 }
552
553#if 0
554 // disabled: not a good idea if mic throws out loud echo
555 /* Also bail out if we suspect there's a doubletalk */
556 ulaw = pjmedia_linear2ulaw(ec->sum_rec_level/ec->templ_cnt) ^ 0xFF;
557 if (ulaw > MIN_SIGNAL_ULAW) {
558 echo_supp_set_state(ec, ST_DOUBLETALK, ulaw);
559 return;
560 }
561#endif
562
563 /* Calculate correlation and save to temporary array */
564 ec->tmp_corr[i] = FABS(play_corr - ec->rec_corr);
565
566 /* Also calculate the gain factor between mic and speaker level */
567 ec->tmp_factor[i] = (float)ec->sum_rec_level / sum_play_level;
568 pj_assert(ec->tmp_factor[i] < 1);
569 }
570
571 /* We seem to have good signal, we can update the EC state */
572 echo_supp_set_state(ec, ST_REM_TALK, MIN_SIGNAL_ULAW);
573
574 /* Accummulate the correlation value to the history and at the same
575 * time find the tail index of the best correlation.
576 */
577 prev_index = ec->tail_index;
578 for (i=1; i<ec->tail_cnt-1; ++i) {
579 float *p = &ec->corr_sum[i], sum;
580
581 /* Accummulate correlation value for this tail position */
582 ec->corr_sum[i] += ec->tmp_corr[i];
583
584 /* Update the min and avg gain factor for this tail position */
585 if (ec->tmp_factor[i] < ec->min_factor[i])
586 ec->min_factor[i] = ec->tmp_factor[i];
587 ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) +
588 ec->tmp_factor[i]) /
589 (ec->tail_cnt + 1);
590
591 /* To get the best correlation, also include the correlation
592 * value of the neighbouring tail locations.
593 */
594 sum = *(p-1) + (*p)*2 + *(p+1);
595 //sum = *p;
596
597 /* See if we have better correlation value */
598 if (sum < ec->best_corr) {
599 ec->tail_index = i;
600 ec->best_corr = sum;
601 }
602 }
603
604 if (ec->tail_index != prev_index) {
605 unsigned duration;
606 int imin, iavg;
607
608 duration = ec->update_cnt * SEGMENT_PTIME;
609 imin = (int)(ec->min_factor[ec->tail_index] * 1000);
610 iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
611
612 PJ_LOG(4,(THIS_FILE,
613 "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec"
614 ", factor min/avg=%d.%03d/%d.%03d",
615 (duration/1000), (duration%1000),
616 (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
617 imin/1000, imin%1000,
618 iavg/1000, iavg%1000));
619
620 }
621
622 ++ec->calc_cnt;
623
624 if (ec->calc_cnt > ec->max_calc) {
625 unsigned duration;
626 int imin, iavg;
627
628
629 ec->learning = PJ_FALSE;
630 ec->running_cnt = 0;
631
632 duration = ec->update_cnt * SEGMENT_PTIME;
633 imin = (int)(ec->min_factor[ec->tail_index] * 1000);
634 iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
635
636 PJ_LOG(4,(THIS_FILE,
637 "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms"
638 ", factor min/avg=%d.%03d/%d.%03d",
639 (duration/1000), (duration%1000),
640 (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
641 imin/1000, imin%1000,
642 iavg/1000, iavg%1000));
643 }
644
645}
646
647
648/* Amplify frame */
649static void amplify_frame(pj_int16_t *frm, unsigned length,
650 pj_ufloat_t factor)
651{
652 unsigned i;
653
654 for (i=0; i<length; ++i) {
655 frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor);
656 }
657}
658
659/*
660 * Perform echo cancellation.
661 */
662PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state,
663 pj_int16_t *rec_frm,
664 const pj_int16_t *play_frm,
665 unsigned options,
666 void *reserved )
667{
668 unsigned i, N;
669 echo_supp *ec = (echo_supp*) state;
670
671 PJ_UNUSED_ARG(options);
672 PJ_UNUSED_ARG(reserved);
673
674 /* Calculate number of segments. This should be okay even if
675 * samples_per_frame is not a multiply of samples_per_segment, since
676 * we only calculate level.
677 */
678 N = ec->samples_per_frame / ec->samples_per_segment;
679 pj_assert(N>0);
680 for (i=0; i<N; ++i) {
681 unsigned pos = i * ec->samples_per_segment;
682 echo_supp_update(ec, rec_frm+pos, play_frm+pos);
683 }
684
685 if (ec->tail_index < 0) {
686 /* Not ready */
687 } else {
688 unsigned lookup_cnt, rec_level=0, play_level=0;
689 unsigned tail_cnt;
690 float factor;
691
692 /* How many previous segments to lookup */
693 lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME;
694 if (lookup_cnt > ec->templ_cnt)
695 lookup_cnt = ec->templ_cnt;
696
697 /* Lookup in recording history to get maximum mic level, to see
698 * if local user is currently talking
699 */
700 for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) {
701 if (ec->rec_hist[i] > rec_level)
702 rec_level = ec->rec_hist[i];
703 }
704 rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF;
705
706 /* Calculate the detected tail length, in # of segments */
707 tail_cnt = (ec->tail_cnt - ec->tail_index);
708
709 /* Lookup in playback history to get max speaker level, to see
710 * if remote user is currently talking
711 */
712 for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt;
713 i<ec->play_hist_cnt-tail_cnt; ++i)
714 {
715 if (ec->play_hist[i] > play_level)
716 play_level = ec->play_hist[i];
717 }
718 play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF;
719
720 if (rec_level >= MIN_SIGNAL_ULAW) {
721 if (play_level < MIN_SIGNAL_ULAW) {
722 /* Mic is talking, speaker is idle. Let mic signal pass as is.
723 */
724 factor = 1.0;
725 echo_supp_set_state(ec, ST_LOCAL_TALK, rec_level);
726 } else if (rec_level > play_level) {
727 /* Seems that both are talking. Scale the mic signal
728 * down a little bit to reduce echo, while allowing both
729 * parties to talk at the same time.
730 */
731 factor = (float)(ec->avg_factor[ec->tail_index] * 2);
732 echo_supp_set_state(ec, ST_DOUBLETALK, rec_level);
733 } else {
734 /* Speaker is active, but we've picked up large signal in
735 * the microphone. Assume that this is an echo, so bring
736 * the level down to minimum too.
737 */
738 factor = ec->min_factor[ec->tail_index] / 2;
739 echo_supp_set_state(ec, ST_REM_TALK, play_level);
740 }
741 } else {
742 if (play_level < MIN_SIGNAL_ULAW) {
743 /* Both mic and speaker seems to be idle. Also scale the
744 * mic signal down with average factor to reduce low power
745 * echo.
746 */
747 factor = ec->avg_factor[ec->tail_index] * 3 / 2;
748 echo_supp_set_state(ec, ST_REM_SILENT, rec_level);
749 } else {
750 /* Mic is idle, but there's something playing in speaker.
751 * Scale the mic down to minimum
752 */
753 factor = ec->min_factor[ec->tail_index] / 2;
754 echo_supp_set_state(ec, ST_REM_TALK, play_level);
755 }
756 }
757
758 /* Smoothen the transition */
759 if (factor >= ec->last_factor)
760 factor = (factor + ec->last_factor) / 2;
761 else
762 factor = (factor + ec->last_factor*19) / 20;
763
764 /* Amplify frame */
765 amplify_frame(rec_frm, ec->samples_per_frame,
766 pj_ufloat_from_float(factor));
767 ec->last_factor = factor;
768
769 if (ec->talk_state == ST_REM_TALK) {
770 unsigned level, recalc_cnt;
771
772 /* Get the adjusted frame signal level */
773 level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame);
774 level = pjmedia_linear2ulaw(level) ^ 0xFF;
775
776 /* Accumulate average echo residue to see the ES effectiveness */
777 ec->residue = ((ec->residue * ec->running_cnt) + level) /
778 (ec->running_cnt + 1);
779
780 ++ec->running_cnt;
781
782 /* Check if we need to re-learn */
783 recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame;
784 if (ec->running_cnt > recalc_cnt) {
785 int iresidue;
786
787 iresidue = (int)(ec->residue*1000);
788
789 PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d",
790 iresidue/1000, iresidue%1000));
791
792 if (ec->residue > MAX_RESIDUE && !ec->learning) {
793 echo_supp_soft_reset(ec);
794 ec->residue = 0;
795 } else {
796 ec->running_cnt = 0;
797 ec->residue = 0;
798 }
799 }
800 }
801 }
802
803 return PJ_SUCCESS;
804}
805