Blame - jni/pjproject-android/.svn/pristine/f7/f7fd67abb7f878b0030bd8be2f9f617fcd1d6f08.svn-base - jami-client-android

blob: 85a6efb4a4aca6f1f430a564a0cf04fd0d438c1f [file] [log] [blame]

Tristan Matthews	0a329cc	2013-07-17 13:20:14 -0400	[diff] [blame^]	1	/* $Id$ */
				2	/*
				3	* Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
				4	* Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
				5	*
				6	* This program is free software; you can redistribute it and/or modify
				7	* it under the terms of the GNU General Public License as published by
				8	* the Free Software Foundation; either version 2 of the License, or
				9	* (at your option) any later version.
				10	*
				11	* This program is distributed in the hope that it will be useful,
				12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				14	* GNU General Public License for more details.
				15	*
				16	* You should have received a copy of the GNU General Public License
				17	* along with this program; if not, write to the Free Software
				18	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				19	*/
				20	#include <pjmedia/types.h>
				21	#include <pjmedia/alaw_ulaw.h>
				22	#include <pjmedia/errno.h>
				23	#include <pjmedia/frame.h>
				24	#include <pjmedia/silencedet.h>
				25	#include <pj/array.h>
				26	#include <pj/assert.h>
				27	#include <pj/lock.h>
				28	#include <pj/log.h>
				29	#include <pj/os.h>
				30	#include <pj/pool.h>
				31
				32	#include "echo_internal.h"
				33
				34	#define THIS_FILE "echo_suppress.c"
				35
				36	/* Maximum float constant */
				37	#define MAX_FLOAT (float)1.701411e38
				38
				39	/* The effective learn duration (in seconds) before we declare that learning
				40	* is complete. The actual learning duration itself may be longer depending
				41	* on the conversation pattern (e.g. we can't detect echo if speaker is only
				42	* playing silence).
				43	*/
				44	#define MAX_CALC_DURATION_SEC 3
				45
				46	/* The internal audio segment length, in milliseconds. 10ms shold be good
				47	* and no need to change it.
				48	*/
				49	#define SEGMENT_PTIME 10
				50
				51	/* The length of the template signal in milliseconds. The longer the template,
				52	* the better correlation will be found, at the expense of more processing
				53	* and longer learning time.
				54	*/
				55	#define TEMPLATE_PTIME 200
				56
				57	/* How long to look back in the past to see if either mic or speaker is
				58	* active.
				59	*/
				60	#define SIGNAL_LOOKUP_MSEC 200
				61
				62	/* The minimum level value to be considered as talking, in uLaw complement
				63	* (0-255).
				64	*/
				65	#define MIN_SIGNAL_ULAW 35
				66
				67	/* The period (in seconds) on which the ES will analize it's effectiveness,
				68	* and it may trigger soft-reset to force recalculation.
				69	*/
				70	#define CHECK_PERIOD 30
				71
				72	/* Maximum signal level of average echo residue (in uLaw complement). When
				73	* the residue value exceeds this value, we force the ES to re-learn.
				74	*/
				75	#define MAX_RESIDUE 2.5
				76
				77
				78	#if 0
				79	# define TRACE_(expr) PJ_LOG(5,expr)
				80	#else
				81	# define TRACE_(expr)
				82	#endif
				83
				84	PJ_INLINE(float) FABS(float val)
				85	{
				86	if (val < 0)
				87	return -val;
				88	else
				89	return val;
				90	}
				91
				92
				93	#if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0
				94	typedef float pj_ufloat_t;
				95	# define pj_ufloat_from_float(f) (f)
				96	# define pj_ufloat_mul_u(val1, f) ((val1) * (f))
				97	# define pj_ufloat_mul_i(val1, f) ((val1) * (f))
				98	#else
				99	typedef pj_uint32_t pj_ufloat_t;
				100
				101	pj_ufloat_t pj_ufloat_from_float(float f)
				102	{
				103	return (pj_ufloat_t)(f * 65536);
				104	}
				105
				106	unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2)
				107	{
				108	return (val1 * val2) >> 16;
				109	}
				110
				111	int pj_ufloat_mul_i(int val1, pj_ufloat_t val2)
				112	{
				113	return (val1 * (pj_int32_t)val2) >> 16;
				114	}
				115	#endif
				116
				117
				118	/* Conversation state */
				119	typedef enum talk_state
				120	{
				121	ST_NULL,
				122	ST_LOCAL_TALK,
				123	ST_REM_SILENT,
				124	ST_DOUBLETALK,
				125	ST_REM_TALK
				126	} talk_state_t;
				127
				128	const char *state_names[] =
				129	{
				130	"Null",
				131	"local talking",
				132	"remote silent",
				133	"doubletalk",
				134	"remote talking"
				135	};
				136
				137
				138	/* Description:
				139
				140	The echo suppressor tries to find the position of echoed signal by looking
				141	at the correlation between signal played to the speaker (played signal)
				142	and the signal captured from the microphone (recorded signal).
				143
				144	To do this, it first divides the frames (from mic and speaker) into
				145	segments, calculate the audio level of the segment, and save the level
				146	information in the playback and record history (play_hist and rec_hist
				147	respectively).
				148
				149	In the history, the newest element (depicted as "t0" in the diagram belo)
				150	is put in the last position of the array.
				151
				152	The record history size is as large as the template size (tmpl_cnt), since
				153	we will use the record history as the template to find the best matching
				154	position in the playback history.
				155
				156	Here is the record history buffer:
				157
				158	<--templ_cnt-->
				159	+-------------+
				160	\| rec_hist \|
				161	+-------------+
				162	t-templ_cnt......t0
				163
				164	As you can see, the newest frame ("t0") is put as the last element in the
				165	array.
				166
				167	The playback history size is larger than record history, since we need to
				168	find the matching pattern in the past. The playback history size is
				169	"templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal
				170	to the maximum tail length. The maximum tail length is set when the ES
				171	is created.
				172
				173	Here is the playback history buffer:
				174
				175	<-----tail_cnt-----> <--templ_cnt-->
				176	+-------------------+--------------+
				177	\| play_hist \|
				178	+-------------------+--------------+
				179	t-play_hist_cnt...t-templ_cnt.......t0
				180
				181
				182
				183	Learning:
				184
				185	During the processing, the ES calculates the following values:
				186	- the correlation value, that is how similar the playback signal compared
				187	to the mic signal. The lower the correlation value the better (i.e. more
				188	similar) the signal is. The correlation value is done over the template
				189	duration.
				190	- the gain scaling factor, that is the ratio between mic signal and
				191	speaker signal. The ES calculates both the minimum and average ratios.
				192
				193	The ES calculates both the values above for every tail position in the
				194	playback history. The values are saved in arrays below:
				195
				196	<-----tail_cnt----->
				197	+-------------------+
				198	\| corr_sum \|
				199	+-------------------+
				200	\| min_factor \|
				201	+-------------------+
				202	\| avg_factor \|
				203	+-------------------+
				204
				205	At the end of processing, the ES iterates through the correlation array and
				206	picks the tail index with the lowest corr_sum value. This is the position
				207	where echo is most likely to be found.
				208
				209
				210	Processing:
				211
				212	Once learning is done, the ES will change the level of the mic signal
				213	depending on the state of the conversation and according to the ratio that
				214	has been found in the learning phase above.
				215
				216	*/
				217
				218	/*
				219	* The simple echo suppresor state
				220	*/
				221	typedef struct echo_supp
				222	{
				223	unsigned clock_rate; /* Clock rate. */
				224	pj_uint16_t samples_per_frame; /* Frame length in samples */
				225	pj_uint16_t samples_per_segment;/* Segment length in samples */
				226	pj_uint16_t tail_ms; /* Tail length in milliseconds */
				227	pj_uint16_t tail_samples; /* Tail length in samples. */
				228
				229	pj_bool_t learning; /* Are we still learning yet? */
				230	talk_state_t talk_state; /* Current talking state */
				231	int tail_index; /* Echo location, -1 if not found */
				232
				233	unsigned max_calc; /* # of calc before learning complete.
				234	(see MAX_CALC_DURATION_SEC) */
				235	unsigned calc_cnt; /* Number of calculations so far */
				236
				237	unsigned update_cnt; /* # of updates */
				238	unsigned templ_cnt; /* Template length, in # of segments */
				239	unsigned tail_cnt; /* Tail length, in # of segments */
				240	unsigned play_hist_cnt; /* # of segments in play_hist */
				241	pj_uint16_t play_hist; / Array of playback levels */
				242	pj_uint16_t rec_hist; / Array of rec levels */
				243
				244	float corr_sum; / Array of corr for each tail pos. */
				245	float tmp_corr; / Temporary corr array calculation */
				246	float best_corr; /* Best correlation so far. */
				247
				248	unsigned sum_rec_level; /* Running sum of level in rec_hist */
				249	float rec_corr; /* Running corr in rec_hist. */
				250
				251	unsigned sum_play_level0; /* Running sum of level for first pos */
				252	float play_corr0; /* Running corr for first pos . */
				253
				254	float min_factor; / Array of minimum scaling factor */
				255	float avg_factor; / Array of average scaling factor */
				256	float tmp_factor; / Array to store provisional result */
				257
				258	unsigned running_cnt; /* Running duration in # of frames */
				259	float residue; /* Accummulated echo residue. */
				260	float last_factor; /* Last factor applied to mic signal */
				261	} echo_supp;
				262
				263
				264
				265	/*
				266	* Create.
				267	*/
				268	PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool,
				269	unsigned clock_rate,
				270	unsigned channel_count,
				271	unsigned samples_per_frame,
				272	unsigned tail_ms,
				273	unsigned options,
				274	void **p_state )
				275	{
				276	echo_supp *ec;
				277
				278	PJ_UNUSED_ARG(channel_count);
				279	PJ_UNUSED_ARG(options);
				280
				281	PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000,
				282	PJ_ENOTSUP);
				283
				284	ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp);
				285	ec->clock_rate = clock_rate;
				286	ec->samples_per_frame = (pj_uint16_t)samples_per_frame;
				287	ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000);
				288	ec->tail_ms = (pj_uint16_t)tail_ms;
				289	ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000);
				290
				291	ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME;
				292	ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME);
				293	ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt);
				294
				295	ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate /
				296	ec->samples_per_segment);
				297
				298	ec->rec_hist = (pj_uint16_t*)
				299	pj_pool_alloc(pool, ec->templ_cnt *
				300	sizeof(ec->rec_hist[0]));
				301
				302	/* Note: play history has twice number of elements */
				303	ec->play_hist = (pj_uint16_t*)
				304	pj_pool_alloc(pool, ec->play_hist_cnt *
				305	sizeof(ec->play_hist[0]));
				306
				307	ec->corr_sum = (float*)
				308	pj_pool_alloc(pool, ec->tail_cnt *
				309	sizeof(ec->corr_sum[0]));
				310	ec->tmp_corr = (float*)
				311	pj_pool_alloc(pool, ec->tail_cnt *
				312	sizeof(ec->tmp_corr[0]));
				313	ec->min_factor = (float*)
				314	pj_pool_alloc(pool, ec->tail_cnt *
				315	sizeof(ec->min_factor[0]));
				316	ec->avg_factor = (float*)
				317	pj_pool_alloc(pool, ec->tail_cnt *
				318	sizeof(ec->avg_factor[0]));
				319	ec->tmp_factor = (float*)
				320	pj_pool_alloc(pool, ec->tail_cnt *
				321	sizeof(ec->tmp_factor[0]));
				322	echo_supp_reset(ec);
				323
				324	*p_state = ec;
				325	return PJ_SUCCESS;
				326	}
				327
				328
				329	/*
				330	* Destroy.
				331	*/
				332	PJ_DEF(pj_status_t) echo_supp_destroy(void *state)
				333	{
				334	PJ_UNUSED_ARG(state);
				335	return PJ_SUCCESS;
				336	}
				337
				338
				339	/*
				340	* Hard reset
				341	*/
				342	PJ_DEF(void) echo_supp_reset(void *state)
				343	{
				344	unsigned i;
				345	echo_supp ec = (echo_supp) state;
				346
				347	pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0]));
				348	pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0]));
				349
				350	for (i=0; i<ec->tail_cnt; ++i) {
				351	ec->corr_sum[i] = ec->avg_factor[i] = 0;
				352	ec->min_factor[i] = MAX_FLOAT;
				353	}
				354
				355	ec->update_cnt = 0;
				356	ec->calc_cnt = 0;
				357	ec->learning = PJ_TRUE;
				358	ec->tail_index = -1;
				359	ec->best_corr = MAX_FLOAT;
				360	ec->talk_state = ST_NULL;
				361	ec->last_factor = 1.0;
				362	ec->residue = 0;
				363	ec->running_cnt = 0;
				364	ec->sum_rec_level = ec->sum_play_level0 = 0;
				365	ec->rec_corr = ec->play_corr0 = 0;
				366	}
				367
				368	/*
				369	* Soft reset to force the EC to re-learn without having to discard all
				370	* rec and playback history.
				371	*/
				372	PJ_DEF(void) echo_supp_soft_reset(void *state)
				373	{
				374	unsigned i;
				375
				376	echo_supp ec = (echo_supp) state;
				377
				378	for (i=0; i<ec->tail_cnt; ++i) {
				379	ec->corr_sum[i] = 0;
				380	}
				381
				382	ec->update_cnt = 0;
				383	ec->calc_cnt = 0;
				384	ec->learning = PJ_TRUE;
				385	ec->best_corr = MAX_FLOAT;
				386	ec->residue = 0;
				387	ec->running_cnt = 0;
				388	ec->sum_rec_level = ec->sum_play_level0 = 0;
				389	ec->rec_corr = ec->play_corr0 = 0;
				390
				391	PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning.."));
				392	}
				393
				394
				395	/* Set state */
				396	static void echo_supp_set_state(echo_supp *ec, talk_state_t state,
				397	unsigned level)
				398	{
				399	PJ_UNUSED_ARG(level);
				400
				401	if (state != ec->talk_state) {
				402	TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s, level=%u",
				403	(ec->update_cnt * SEGMENT_PTIME / 1000),
				404	((ec->update_cnt * SEGMENT_PTIME) % 1000),
				405	state_names[ec->talk_state],
				406	state_names[state], level));
				407	ec->talk_state = state;
				408	}
				409	}
				410
				411	/*
				412	* Update EC state
				413	*/
				414	static void echo_supp_update(echo_supp ec, pj_int16_t rec_frm,
				415	const pj_int16_t *play_frm)
				416	{
				417	int prev_index;
				418	unsigned i, j, frm_level, sum_play_level, ulaw;
				419	pj_uint16_t old_rec_frm_level, old_play_frm_level;
				420	float play_corr;
				421
				422	++ec->update_cnt;
				423	if (ec->update_cnt > 0x7FFFFFFF)
				424	ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */
				425
				426	/* Calculate current play frame level */
				427	frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment);
				428	++frm_level; /* to avoid division by zero */
				429
				430	/* Save the oldest frame level for later */
				431	old_play_frm_level = ec->play_hist[0];
				432
				433	/* Push current frame level to the back of the play history */
				434	pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0);
				435	ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level;
				436
				437	/* Calculate level of current mic frame */
				438	frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment);
				439	++frm_level; /* to avoid division by zero */
				440
				441	/* Save the oldest frame level for later */
				442	old_rec_frm_level = ec->rec_hist[0];
				443
				444	/* Push to the back of the rec history */
				445	pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0);
				446	ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level;
				447
				448
				449	/* Can't do the calc until the play history is full. */
				450	if (ec->update_cnt < ec->play_hist_cnt)
				451	return;
				452
				453	/* Skip if learning is done */
				454	if (!ec->learning)
				455	return;
				456
				457
				458	/* Calculate rec signal pattern */
				459	if (ec->sum_rec_level == 0) {
				460	/* Buffer has just been filled up, do full calculation */
				461	ec->rec_corr = 0;
				462	ec->sum_rec_level = 0;
				463	for (i=0; i < ec->templ_cnt-1; ++i) {
				464	float corr;
				465	corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i];
				466	ec->rec_corr += corr;
				467	ec->sum_rec_level += ec->rec_hist[i];
				468	}
				469	ec->sum_rec_level += ec->rec_hist[i];
				470	} else {
				471	/* Update from previous calculation */
				472	ec->sum_rec_level = ec->sum_rec_level - old_rec_frm_level +
				473	ec->rec_hist[ec->templ_cnt-1];
				474	ec->rec_corr = ec->rec_corr - ((float)ec->rec_hist[0] /
				475	old_rec_frm_level) +
				476	((float)ec->rec_hist[ec->templ_cnt-1] /
				477	ec->rec_hist[ec->templ_cnt-2]);
				478	}
				479
				480	/* Iterate through the play history and calculate the signal correlation
				481	* for every tail position in the play_hist. Save the result in temporary
				482	* array since we may bail out early if the conversation state is not good
				483	* to detect echo.
				484	*/
				485	/*
				486	* First phase: do full calculation for the first position
				487	*/
				488	if (ec->sum_play_level0 == 0) {
				489	/* Buffer has just been filled up, do full calculation */
				490	sum_play_level = 0;
				491	play_corr = 0;
				492	for (j=0; j<ec->templ_cnt-1; ++j) {
				493	float corr;
				494	corr = (float)ec->play_hist[j+1] / ec->play_hist[j];
				495	play_corr += corr;
				496	sum_play_level += ec->play_hist[j];
				497	}
				498	sum_play_level += ec->play_hist[j];
				499	ec->sum_play_level0 = sum_play_level;
				500	ec->play_corr0 = play_corr;
				501	} else {
				502	/* Update from previous calculation */
				503	ec->sum_play_level0 = ec->sum_play_level0 - old_play_frm_level +
				504	ec->play_hist[ec->templ_cnt-1];
				505	ec->play_corr0 = ec->play_corr0 - ((float)ec->play_hist[0] /
				506	old_play_frm_level) +
				507	((float)ec->play_hist[ec->templ_cnt-1] /
				508	ec->play_hist[ec->templ_cnt-2]);
				509	sum_play_level = ec->sum_play_level0;
				510	play_corr = ec->play_corr0;
				511	}
				512	ec->tmp_corr[0] = FABS(play_corr - ec->rec_corr);
				513	ec->tmp_factor[0] = (float)ec->sum_rec_level / sum_play_level;
				514
				515	/* Bail out if remote isn't talking */
				516	ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
				517	if (ulaw < MIN_SIGNAL_ULAW) {
				518	echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
				519	return;
				520	}
				521	/* Bail out if local user is talking */
				522	if (ec->sum_rec_level >= sum_play_level) {
				523	echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
				524	return;
				525	}
				526
				527	/*
				528	* Second phase: do incremental calculation for the rest of positions
				529	*/
				530	for (i=1; i < ec->tail_cnt; ++i) {
				531	unsigned end;
				532
				533	end = i + ec->templ_cnt;
				534
				535	sum_play_level = sum_play_level - ec->play_hist[i-1] +
				536	ec->play_hist[end-1];
				537	play_corr = play_corr - ((float)ec->play_hist[i]/ec->play_hist[i-1]) +
				538	((float)ec->play_hist[end-1]/ec->play_hist[end-2]);
				539
				540	/* Bail out if remote isn't talking */
				541	ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
				542	if (ulaw < MIN_SIGNAL_ULAW) {
				543	echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
				544	return;
				545	}
				546
				547	/* Bail out if local user is talking */
				548	if (ec->sum_rec_level >= sum_play_level) {
				549	echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
				550	return;
				551	}
				552
				553	#if 0
				554	// disabled: not a good idea if mic throws out loud echo
				555	/* Also bail out if we suspect there's a doubletalk */
				556	ulaw = pjmedia_linear2ulaw(ec->sum_rec_level/ec->templ_cnt) ^ 0xFF;
				557	if (ulaw > MIN_SIGNAL_ULAW) {
				558	echo_supp_set_state(ec, ST_DOUBLETALK, ulaw);
				559	return;
				560	}
				561	#endif
				562
				563	/* Calculate correlation and save to temporary array */
				564	ec->tmp_corr[i] = FABS(play_corr - ec->rec_corr);
				565
				566	/* Also calculate the gain factor between mic and speaker level */
				567	ec->tmp_factor[i] = (float)ec->sum_rec_level / sum_play_level;
				568	pj_assert(ec->tmp_factor[i] < 1);
				569	}
				570
				571	/* We seem to have good signal, we can update the EC state */
				572	echo_supp_set_state(ec, ST_REM_TALK, MIN_SIGNAL_ULAW);
				573
				574	/* Accummulate the correlation value to the history and at the same
				575	* time find the tail index of the best correlation.
				576	*/
				577	prev_index = ec->tail_index;
				578	for (i=1; i<ec->tail_cnt-1; ++i) {
				579	float *p = &ec->corr_sum[i], sum;
				580
				581	/* Accummulate correlation value for this tail position */
				582	ec->corr_sum[i] += ec->tmp_corr[i];
				583
				584	/* Update the min and avg gain factor for this tail position */
				585	if (ec->tmp_factor[i] < ec->min_factor[i])
				586	ec->min_factor[i] = ec->tmp_factor[i];
				587	ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) +
				588	ec->tmp_factor[i]) /
				589	(ec->tail_cnt + 1);
				590
				591	/* To get the best correlation, also include the correlation
				592	* value of the neighbouring tail locations.
				593	*/
				594	sum = (p-1) + (p)2 + (p+1);
				595	//sum = *p;
				596
				597	/* See if we have better correlation value */
				598	if (sum < ec->best_corr) {
				599	ec->tail_index = i;
				600	ec->best_corr = sum;
				601	}
				602	}
				603
				604	if (ec->tail_index != prev_index) {
				605	unsigned duration;
				606	int imin, iavg;
				607
				608	duration = ec->update_cnt * SEGMENT_PTIME;
				609	imin = (int)(ec->min_factor[ec->tail_index] * 1000);
				610	iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
				611
				612	PJ_LOG(4,(THIS_FILE,
				613	"Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec"
				614	", factor min/avg=%d.%03d/%d.%03d",
				615	(duration/1000), (duration%1000),
				616	(ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
				617	imin/1000, imin%1000,
				618	iavg/1000, iavg%1000));
				619
				620	}
				621
				622	++ec->calc_cnt;
				623
				624	if (ec->calc_cnt > ec->max_calc) {
				625	unsigned duration;
				626	int imin, iavg;
				627
				628
				629	ec->learning = PJ_FALSE;
				630	ec->running_cnt = 0;
				631
				632	duration = ec->update_cnt * SEGMENT_PTIME;
				633	imin = (int)(ec->min_factor[ec->tail_index] * 1000);
				634	iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
				635
				636	PJ_LOG(4,(THIS_FILE,
				637	"Echo suppressor learning done at t=%03d.%03ds, tail=%d ms"
				638	", factor min/avg=%d.%03d/%d.%03d",
				639	(duration/1000), (duration%1000),
				640	(ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
				641	imin/1000, imin%1000,
				642	iavg/1000, iavg%1000));
				643	}
				644
				645	}
				646
				647
				648	/* Amplify frame */
				649	static void amplify_frame(pj_int16_t *frm, unsigned length,
				650	pj_ufloat_t factor)
				651	{
				652	unsigned i;
				653
				654	for (i=0; i<length; ++i) {
				655	frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor);
				656	}
				657	}
				658
				659	/*
				660	* Perform echo cancellation.
				661	*/
				662	PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state,
				663	pj_int16_t *rec_frm,
				664	const pj_int16_t *play_frm,
				665	unsigned options,
				666	void *reserved )
				667	{
				668	unsigned i, N;
				669	echo_supp ec = (echo_supp) state;
				670
				671	PJ_UNUSED_ARG(options);
				672	PJ_UNUSED_ARG(reserved);
				673
				674	/* Calculate number of segments. This should be okay even if
				675	* samples_per_frame is not a multiply of samples_per_segment, since
				676	* we only calculate level.
				677	*/
				678	N = ec->samples_per_frame / ec->samples_per_segment;
				679	pj_assert(N>0);
				680	for (i=0; i<N; ++i) {
				681	unsigned pos = i * ec->samples_per_segment;
				682	echo_supp_update(ec, rec_frm+pos, play_frm+pos);
				683	}
				684
				685	if (ec->tail_index < 0) {
				686	/* Not ready */
				687	} else {
				688	unsigned lookup_cnt, rec_level=0, play_level=0;
				689	unsigned tail_cnt;
				690	float factor;
				691
				692	/* How many previous segments to lookup */
				693	lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME;
				694	if (lookup_cnt > ec->templ_cnt)
				695	lookup_cnt = ec->templ_cnt;
				696
				697	/* Lookup in recording history to get maximum mic level, to see
				698	* if local user is currently talking
				699	*/
				700	for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) {
				701	if (ec->rec_hist[i] > rec_level)
				702	rec_level = ec->rec_hist[i];
				703	}
				704	rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF;
				705
				706	/* Calculate the detected tail length, in # of segments */
				707	tail_cnt = (ec->tail_cnt - ec->tail_index);
				708
				709	/* Lookup in playback history to get max speaker level, to see
				710	* if remote user is currently talking
				711	*/
				712	for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt;
				713	i<ec->play_hist_cnt-tail_cnt; ++i)
				714	{
				715	if (ec->play_hist[i] > play_level)
				716	play_level = ec->play_hist[i];
				717	}
				718	play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF;
				719
				720	if (rec_level >= MIN_SIGNAL_ULAW) {
				721	if (play_level < MIN_SIGNAL_ULAW) {
				722	/* Mic is talking, speaker is idle. Let mic signal pass as is.
				723	*/
				724	factor = 1.0;
				725	echo_supp_set_state(ec, ST_LOCAL_TALK, rec_level);
				726	} else if (rec_level > play_level) {
				727	/* Seems that both are talking. Scale the mic signal
				728	* down a little bit to reduce echo, while allowing both
				729	* parties to talk at the same time.
				730	*/
				731	factor = (float)(ec->avg_factor[ec->tail_index] * 2);
				732	echo_supp_set_state(ec, ST_DOUBLETALK, rec_level);
				733	} else {
				734	/* Speaker is active, but we've picked up large signal in
				735	* the microphone. Assume that this is an echo, so bring
				736	* the level down to minimum too.
				737	*/
				738	factor = ec->min_factor[ec->tail_index] / 2;
				739	echo_supp_set_state(ec, ST_REM_TALK, play_level);
				740	}
				741	} else {
				742	if (play_level < MIN_SIGNAL_ULAW) {
				743	/* Both mic and speaker seems to be idle. Also scale the
				744	* mic signal down with average factor to reduce low power
				745	* echo.
				746	*/
				747	factor = ec->avg_factor[ec->tail_index] * 3 / 2;
				748	echo_supp_set_state(ec, ST_REM_SILENT, rec_level);
				749	} else {
				750	/* Mic is idle, but there's something playing in speaker.
				751	* Scale the mic down to minimum
				752	*/
				753	factor = ec->min_factor[ec->tail_index] / 2;
				754	echo_supp_set_state(ec, ST_REM_TALK, play_level);
				755	}
				756	}
				757
				758	/* Smoothen the transition */
				759	if (factor >= ec->last_factor)
				760	factor = (factor + ec->last_factor) / 2;
				761	else
				762	factor = (factor + ec->last_factor*19) / 20;
				763
				764	/* Amplify frame */
				765	amplify_frame(rec_frm, ec->samples_per_frame,
				766	pj_ufloat_from_float(factor));
				767	ec->last_factor = factor;
				768
				769	if (ec->talk_state == ST_REM_TALK) {
				770	unsigned level, recalc_cnt;
				771
				772	/* Get the adjusted frame signal level */
				773	level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame);
				774	level = pjmedia_linear2ulaw(level) ^ 0xFF;
				775
				776	/* Accumulate average echo residue to see the ES effectiveness */
				777	ec->residue = ((ec->residue * ec->running_cnt) + level) /
				778	(ec->running_cnt + 1);
				779
				780	++ec->running_cnt;
				781
				782	/* Check if we need to re-learn */
				783	recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame;
				784	if (ec->running_cnt > recalc_cnt) {
				785	int iresidue;
				786
				787	iresidue = (int)(ec->residue*1000);
				788
				789	PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d",
				790	iresidue/1000, iresidue%1000));
				791
				792	if (ec->residue > MAX_RESIDUE && !ec->learning) {
				793	echo_supp_soft_reset(ec);
				794	ec->residue = 0;
				795	} else {
				796	ec->running_cnt = 0;
				797	ec->residue = 0;
				798	}
				799	}
				800	}
				801	}
				802
				803	return PJ_SUCCESS;
				804	}
				805