blob: 8be617082105e9544940226f9c019f1b7537192b [file] [log] [blame]
/* $Id$ */
/*
* Copyright (C) 2003-2007 Benny Prijono <benny@prijono.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <pjmedia/silencedet.h>
#include <pjmedia/alaw_ulaw.h>
#include <pjmedia/errno.h>
#include <pj/assert.h>
#include <pj/log.h>
#include <pj/pool.h>
#define THIS_FILE "silencedet.c"
#if 1
# define TRACE_(x) PJ_LOG(3,x)
#else
# define TRACE_(x)
#endif
/**
* This enumeration specifies operation mode of silence detector
*/
typedef enum pjmedia_silence_det_mode {
VAD_MODE_NONE,
VAD_MODE_FIXED,
VAD_MODE_ADAPTIVE
} pjmedia_silence_det_mode;
/**
* Default settings
*/
#define DEF_RECALC_ON_VOICED 4000 /* Time to recalculate threshold
in voiced condition, in ms */
#define DEF_RECALC_ON_SILENCE 2000 /* Time to recalculate threshold
in silence condition, in ms. */
#define DEF_BEFORE_SILENCE 400 /* Silence time before really changing
state into SILENCE, in ms. */
#define DEF_THRESHOLD 1000 /* Default threshold. */
/**
* This enumeration specifies the states of the silence detector.
*/
enum pjmedia_silence_det_state {
STATE_SILENCE,
STATE_START_SILENCE,
STATE_VOICED
};
/**
* This structure holds the silence detector state.
*/
struct pjmedia_silence_det
{
char objname[PJ_MAX_OBJ_NAME]; /**< VAD name. */
int mode; /**< VAD mode. */
unsigned ptime; /**< Frame time, in msec. */
unsigned threshold; /**< Current threshold level. */
unsigned sum_level; /**< Total sum of recent level. */
unsigned sum_cnt; /**< Number of level summed. */
unsigned silence_timer; /**< Silence condition timer. */
unsigned voiced_timer; /**< Voiced condition timer. */
enum pjmedia_silence_det_state state;/**< Silence detector state. */
unsigned recalc_on_voiced; /**< Setting of time to recalc
threshold in voiced condition. */
unsigned recalc_on_silence; /**< Setting of time to recalc
threshold in silence condition.*/
unsigned before_silence; /**< Setting of silence time before
really changing state into SILENCE,
in ms. */
};
PJ_DEF(pj_status_t) pjmedia_silence_det_create( pj_pool_t *pool,
unsigned clock_rate,
unsigned samples_per_frame,
pjmedia_silence_det **p_sd)
{
pjmedia_silence_det *sd;
PJ_ASSERT_RETURN(pool && p_sd, PJ_EINVAL);
sd = PJ_POOL_ZALLOC_T(pool, pjmedia_silence_det);
pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, "sd%p", sd);
sd->objname[PJ_MAX_OBJ_NAME-1] = '\0';
sd->ptime = samples_per_frame * 1000 / clock_rate;
/* Default settings */
pjmedia_silence_det_set_params(sd, -1, -1, -1);
/* Restart in adaptive, silent mode */
pjmedia_silence_det_set_adaptive( sd, -1 );
*p_sd = sd;
return PJ_SUCCESS;
}
PJ_DEF(pj_status_t) pjmedia_silence_det_set_name( pjmedia_silence_det *sd,
const char *name)
{
PJ_ASSERT_RETURN(sd && name, PJ_EINVAL);
pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, name, sd);
sd->objname[PJ_MAX_OBJ_NAME-1] = '\0';
return PJ_SUCCESS;
}
PJ_DEF(pj_status_t) pjmedia_silence_det_set_adaptive(pjmedia_silence_det *sd,
int threshold)
{
PJ_ASSERT_RETURN(sd, PJ_EINVAL);
if (threshold < 0)
threshold = DEF_THRESHOLD;
sd->mode = VAD_MODE_ADAPTIVE;
sd->threshold = threshold;
return PJ_SUCCESS;
}
PJ_DEF(pj_status_t) pjmedia_silence_det_set_fixed( pjmedia_silence_det *sd,
int threshold )
{
PJ_ASSERT_RETURN(sd, PJ_EINVAL);
if (threshold < 0)
threshold = DEF_THRESHOLD;
sd->mode = VAD_MODE_FIXED;
sd->threshold = threshold;
return PJ_SUCCESS;
}
PJ_DEF(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd,
int before_silence,
int recalc_time1,
int recalc_time2)
{
PJ_ASSERT_RETURN(sd, PJ_EINVAL);
if (recalc_time1 < 0)
recalc_time1 = DEF_RECALC_ON_VOICED;
if (recalc_time2 < 0)
recalc_time2 = DEF_RECALC_ON_SILENCE;
if (before_silence < 0)
before_silence = DEF_BEFORE_SILENCE;
sd->recalc_on_voiced = recalc_time1;
sd->recalc_on_silence = recalc_time2;
sd->before_silence = before_silence;
return PJ_SUCCESS;
}
PJ_DEF(pj_status_t) pjmedia_silence_det_disable( pjmedia_silence_det *sd )
{
PJ_ASSERT_RETURN(sd, PJ_EINVAL);
sd->mode = VAD_MODE_NONE;
return PJ_SUCCESS;
}
PJ_DEF(pj_int32_t) pjmedia_calc_avg_signal( const pj_int16_t samples[],
pj_size_t count)
{
pj_uint32_t sum = 0;
const pj_int16_t * pcm = samples;
const pj_int16_t * end = samples + count;
if (count==0)
return 0;
while (pcm != end) {
if (*pcm < 0)
sum -= *pcm++;
else
sum += *pcm++;
}
return (pj_int32_t)(sum / count);
}
PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd,
pj_uint32_t level)
{
int avg_recent_level;
if (sd->mode == VAD_MODE_NONE)
return PJ_FALSE;
if (sd->mode == VAD_MODE_FIXED)
return (level < sd->threshold);
/* Calculating recent level */
sd->sum_level += level;
++sd->sum_cnt;
avg_recent_level = (sd->sum_level / sd->sum_cnt);
if (level > sd->threshold ||
level >= PJMEDIA_SILENCE_DET_MAX_THRESHOLD)
{
sd->silence_timer = 0;
sd->voiced_timer += sd->ptime;
switch(sd->state) {
case STATE_VOICED:
if (sd->voiced_timer > sd->recalc_on_voiced) {
/* Voiced for long time (>recalc_on_voiced), current
* threshold seems to be too low.
*/
sd->threshold = (avg_recent_level + sd->threshold) >> 1;
TRACE_((THIS_FILE,"Re-adjust threshold (in talk burst)"
"to %d", sd->threshold));
sd->voiced_timer = 0;
/* Reset sig_level */
sd->sum_level = avg_recent_level;
sd->sum_cnt = 1;
}
break;
case STATE_SILENCE:
TRACE_((THIS_FILE,"Starting talk burst (level=%d threshold=%d)",
level, sd->threshold));
case STATE_START_SILENCE:
sd->state = STATE_VOICED;
/* Reset sig_level */
sd->sum_level = level;
sd->sum_cnt = 1;
break;
default:
pj_assert(0);
break;
}
} else {
sd->voiced_timer = 0;
sd->silence_timer += sd->ptime;
switch(sd->state) {
case STATE_SILENCE:
if (sd->silence_timer >= sd->recalc_on_silence) {
sd->threshold = avg_recent_level << 1;
TRACE_((THIS_FILE,"Re-adjust threshold (in silence)"
"to %d", sd->threshold));
sd->silence_timer = 0;
/* Reset sig_level */
sd->sum_level = avg_recent_level;
sd->sum_cnt = 1;
}
break;
case STATE_VOICED:
sd->state = STATE_START_SILENCE;
/* Reset sig_level */
sd->sum_level = level;
sd->sum_cnt = 1;
case STATE_START_SILENCE:
if (sd->silence_timer >= sd->before_silence) {
sd->state = STATE_SILENCE;
sd->threshold = avg_recent_level << 1;
TRACE_((THIS_FILE,"Starting silence (level=%d "
"threshold=%d)", level, sd->threshold));
/* Reset sig_level */
sd->sum_level = avg_recent_level;
sd->sum_cnt = 1;
}
break;
default:
pj_assert(0);
break;
}
}
return (sd->state == STATE_SILENCE);
}
PJ_DEF(pj_bool_t) pjmedia_silence_det_detect( pjmedia_silence_det *sd,
const pj_int16_t samples[],
pj_size_t count,
pj_int32_t *p_level)
{
pj_uint32_t level;
/* Calculate average signal level. */
level = pjmedia_calc_avg_signal(samples, count);
/* Report to caller, if required. */
if (p_level)
*p_level = level;
return pjmedia_silence_det_apply(sd, level);
}