blob: e0342241d6eafe818845b9395e68a1367ccb69a5 [file] [log] [blame]
Tristan Matthews0a329cc2013-07-17 13:20:14 -04001/* $Id$ */
2/*
3 * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4 * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include <pjmedia/silencedet.h>
21#include <pjmedia/alaw_ulaw.h>
22#include <pjmedia/errno.h>
23#include <pj/assert.h>
24#include <pj/log.h>
25#include <pj/pool.h>
26#include <pj/string.h>
27
28#define THIS_FILE "silencedet.c"
29
30#if 1
31# define TRACE_(x) PJ_LOG(5,x)
32#else
33# define TRACE_(x)
34#endif
35
36/**
37 * This enumeration specifies operation mode of silence detector
38 */
39typedef enum pjmedia_silence_det_mode {
40 VAD_MODE_NONE,
41 VAD_MODE_FIXED,
42 VAD_MODE_ADAPTIVE
43} pjmedia_silence_det_mode;
44
45/**
46 * Default settings
47 */
48#define DEF_RECALC_ON_VOICED 4000 /* Time to recalculate threshold
49 in voiced condition, in ms */
50#define DEF_RECALC_ON_SILENCE 2000 /* Time to recalculate threshold
51 in silence condition, in ms. */
52#define DEF_BEFORE_SILENCE 400 /* Silence time before really changing
53 state into SILENCE, in ms. */
54#define DEF_THRESHOLD 1000 /* Default threshold. */
55
56/**
57 * This enumeration specifies the states of the silence detector.
58 */
59enum pjmedia_silence_det_state {
60 STATE_SILENCE,
61 STATE_START_SILENCE,
62 STATE_VOICED
63};
64
65/**
66 * This structure holds the silence detector state.
67 */
68struct pjmedia_silence_det
69{
70 char objname[PJ_MAX_OBJ_NAME]; /**< VAD name. */
71
72 int mode; /**< VAD mode. */
73 unsigned ptime; /**< Frame time, in msec. */
74
75 unsigned threshold; /**< Current threshold level. */
76 unsigned sum_level; /**< Total sum of recent level. */
77 unsigned sum_cnt; /**< Number of level summed. */
78 unsigned silence_timer; /**< Silence condition timer. */
79 unsigned voiced_timer; /**< Voiced condition timer. */
80
81 enum pjmedia_silence_det_state state;/**< Silence detector state. */
82 unsigned recalc_on_voiced; /**< Setting of time to recalc
83 threshold in voiced condition. */
84 unsigned recalc_on_silence; /**< Setting of time to recalc
85 threshold in silence condition.*/
86 unsigned before_silence; /**< Setting of silence time before
87 really changing state into SILENCE,
88 in ms. */
89};
90
91
92
93PJ_DEF(pj_status_t) pjmedia_silence_det_create( pj_pool_t *pool,
94 unsigned clock_rate,
95 unsigned samples_per_frame,
96 pjmedia_silence_det **p_sd)
97{
98 pjmedia_silence_det *sd;
99
100 PJ_ASSERT_RETURN(pool && p_sd, PJ_EINVAL);
101
102 sd = PJ_POOL_ZALLOC_T(pool, pjmedia_silence_det);
103
104 pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, "sd%p", sd);
105 sd->objname[PJ_MAX_OBJ_NAME-1] = '\0';
106
107 sd->ptime = samples_per_frame * 1000 / clock_rate;
108
109 /* Default settings */
110 pjmedia_silence_det_set_params(sd, -1, -1, -1);
111
112 /* Restart in adaptive, silent mode */
113 pjmedia_silence_det_set_adaptive( sd, -1 );
114
115 *p_sd = sd;
116 return PJ_SUCCESS;
117}
118
119
120PJ_DEF(pj_status_t) pjmedia_silence_det_set_name( pjmedia_silence_det *sd,
121 const char *name)
122{
123 PJ_ASSERT_RETURN(sd && name, PJ_EINVAL);
124
125 pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, name, sd);
126 sd->objname[PJ_MAX_OBJ_NAME-1] = '\0';
127 return PJ_SUCCESS;
128}
129
130PJ_DEF(pj_status_t) pjmedia_silence_det_set_adaptive(pjmedia_silence_det *sd,
131 int threshold)
132{
133 PJ_ASSERT_RETURN(sd, PJ_EINVAL);
134
135 if (threshold < 0)
136 threshold = DEF_THRESHOLD;
137
138 sd->mode = VAD_MODE_ADAPTIVE;
139 sd->threshold = threshold;
140
141 return PJ_SUCCESS;
142}
143
144PJ_DEF(pj_status_t) pjmedia_silence_det_set_fixed( pjmedia_silence_det *sd,
145 int threshold )
146{
147 PJ_ASSERT_RETURN(sd, PJ_EINVAL);
148
149 if (threshold < 0)
150 threshold = DEF_THRESHOLD;
151
152 sd->mode = VAD_MODE_FIXED;
153 sd->threshold = threshold;
154
155 return PJ_SUCCESS;
156}
157
158PJ_DEF(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd,
159 int before_silence,
160 int recalc_time1,
161 int recalc_time2)
162{
163 PJ_ASSERT_RETURN(sd, PJ_EINVAL);
164
165 if (recalc_time1 < 0)
166 recalc_time1 = DEF_RECALC_ON_VOICED;
167 if (recalc_time2 < 0)
168 recalc_time2 = DEF_RECALC_ON_SILENCE;
169 if (before_silence < 0)
170 before_silence = DEF_BEFORE_SILENCE;
171
172 sd->recalc_on_voiced = recalc_time1;
173 sd->recalc_on_silence = recalc_time2;
174 sd->before_silence = before_silence;
175
176 return PJ_SUCCESS;
177}
178
179
180PJ_DEF(pj_status_t) pjmedia_silence_det_disable( pjmedia_silence_det *sd )
181{
182 PJ_ASSERT_RETURN(sd, PJ_EINVAL);
183
184 sd->mode = VAD_MODE_NONE;
185
186 return PJ_SUCCESS;
187}
188
189
190PJ_DEF(pj_int32_t) pjmedia_calc_avg_signal( const pj_int16_t samples[],
191 pj_size_t count)
192{
193 pj_uint32_t sum = 0;
194
195 const pj_int16_t * pcm = samples;
196 const pj_int16_t * end = samples + count;
197
198 if (count==0)
199 return 0;
200
201 while (pcm != end) {
202 if (*pcm < 0)
203 sum -= *pcm++;
204 else
205 sum += *pcm++;
206 }
207
208 return (pj_int32_t)(sum / count);
209}
210
211PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd,
212 pj_uint32_t level)
213{
214 int avg_recent_level;
215
216 if (sd->mode == VAD_MODE_NONE)
217 return PJ_FALSE;
218
219 if (sd->mode == VAD_MODE_FIXED)
220 return (level < sd->threshold);
221
222 /* Calculating recent level */
223 sd->sum_level += level;
224 ++sd->sum_cnt;
225 avg_recent_level = (sd->sum_level / sd->sum_cnt);
226
227 if (level > sd->threshold ||
228 level >= PJMEDIA_SILENCE_DET_MAX_THRESHOLD)
229 {
230 sd->silence_timer = 0;
231 sd->voiced_timer += sd->ptime;
232
233 switch(sd->state) {
234 case STATE_VOICED:
235 if (sd->voiced_timer > sd->recalc_on_voiced) {
236 /* Voiced for long time (>recalc_on_voiced), current
237 * threshold seems to be too low.
238 */
239 sd->threshold = (avg_recent_level + sd->threshold) >> 1;
240 TRACE_((THIS_FILE,"Re-adjust threshold (in talk burst)"
241 "to %d", sd->threshold));
242
243 sd->voiced_timer = 0;
244
245 /* Reset sig_level */
246 sd->sum_level = avg_recent_level;
247 sd->sum_cnt = 1;
248 }
249 break;
250
251 case STATE_SILENCE:
252 TRACE_((THIS_FILE,"Starting talk burst (level=%d threshold=%d)",
253 level, sd->threshold));
254
255 case STATE_START_SILENCE:
256 sd->state = STATE_VOICED;
257
258 /* Reset sig_level */
259 sd->sum_level = level;
260 sd->sum_cnt = 1;
261
262 break;
263
264 default:
265 pj_assert(0);
266 break;
267 }
268 } else {
269 sd->voiced_timer = 0;
270 sd->silence_timer += sd->ptime;
271
272 switch(sd->state) {
273 case STATE_SILENCE:
274 if (sd->silence_timer >= sd->recalc_on_silence) {
275 sd->threshold = avg_recent_level << 1;
276 TRACE_((THIS_FILE,"Re-adjust threshold (in silence)"
277 "to %d", sd->threshold));
278
279 sd->silence_timer = 0;
280
281 /* Reset sig_level */
282 sd->sum_level = avg_recent_level;
283 sd->sum_cnt = 1;
284 }
285 break;
286
287 case STATE_VOICED:
288 sd->state = STATE_START_SILENCE;
289
290 /* Reset sig_level */
291 sd->sum_level = level;
292 sd->sum_cnt = 1;
293
294 case STATE_START_SILENCE:
295 if (sd->silence_timer >= sd->before_silence) {
296 sd->state = STATE_SILENCE;
297 sd->threshold = avg_recent_level << 1;
298 TRACE_((THIS_FILE,"Starting silence (level=%d "
299 "threshold=%d)", level, sd->threshold));
300
301 /* Reset sig_level */
302 sd->sum_level = avg_recent_level;
303 sd->sum_cnt = 1;
304 }
305 break;
306
307 default:
308 pj_assert(0);
309 break;
310 }
311 }
312
313 return (sd->state == STATE_SILENCE);
314}
315
316
317PJ_DEF(pj_bool_t) pjmedia_silence_det_detect( pjmedia_silence_det *sd,
318 const pj_int16_t samples[],
319 pj_size_t count,
320 pj_int32_t *p_level)
321{
322 pj_uint32_t level;
323
324 /* Calculate average signal level. */
325 level = pjmedia_calc_avg_signal(samples, count);
326
327 /* Report to caller, if required. */
328 if (p_level)
329 *p_level = level;
330
331 return pjmedia_silence_det_apply(sd, level);
332}
333