Implement ticket #56: Periodically transmit RTP packet on silence, to maintain NAT binding etc.

git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@888 74dad513-b988-da41-8d7b-12977e46ad98
diff --git a/pjmedia/include/pjmedia/config.h b/pjmedia/include/pjmedia/config.h
index ea84baa..5ecc04b 100644
--- a/pjmedia/include/pjmedia/config.h
+++ b/pjmedia/include/pjmedia/config.h
@@ -174,11 +174,16 @@
 /**
  * Specify how long (in miliseconds) the stream should suspend the
  * silence detector/voice activity detector (VAD) during the initial
- * period of the session.
+ * period of the session. This feature is useful to open bindings in
+ * all NAT routers between local and remote endpoint since most NATs
+ * do not allow incoming packet to get in before local endpoint sends
+ * outgoing packets.
  *
  * Specify zero to disable this feature.
  *
- * Default: 600 msec
+ * Default: 600 msec (which gives good probability that some RTP 
+ *                    packets will reach the destination, but without
+ *                    filling up the jitter buffer on the remote end).
  */
 #ifndef PJMEDIA_STREAM_VAD_SUSPEND_MSEC
 #   define PJMEDIA_STREAM_VAD_SUSPEND_MSEC	600
@@ -186,6 +191,26 @@
 
 
 /**
+ * Specify the maximum duration of silence period in the codec. 
+ * This is useful for example to keep NAT binding open in the firewall
+ * and to prevent server from disconnecting the call because no 
+ * RTP packet is received.
+ *
+ * This only applies to codecs that use PJMEDIA's VAD (pretty much
+ * everything including iLBC, except Speex, which has its own DTX 
+ * mechanism).
+ *
+ * Use (-1) to disable this feature.
+ *
+ * Default: 8000 (one second on 8KHz).
+ *
+ */
+#ifndef PJMEDIA_CODEC_MAX_SILENCE_PERIOD
+#   define PJMEDIA_CODEC_MAX_SILENCE_PERIOD	8000
+#endif
+
+
+/**
  * Suggested or default threshold to be set for fixed silence detection
  * or as starting threshold for adaptive silence detection. The threshold
  * has the range from zero to 255.
diff --git a/pjmedia/src/pjmedia-codec/gsm.c b/pjmedia/src/pjmedia-codec/gsm.c
index f774a5e..8118bcb 100644
--- a/pjmedia/src/pjmedia-codec/gsm.c
+++ b/pjmedia/src/pjmedia-codec/gsm.c
@@ -119,6 +119,7 @@
     pjmedia_plc		*plc;
     pj_bool_t		 vad_enabled;
     pjmedia_silence_det	*vad;
+    pj_timestamp	 last_tx;
 };
 
 
@@ -365,6 +366,9 @@
 	pjmedia_plc_save(gsm_data->plc, frame);
     }
 
+    /* Re-init silence_period */
+    pj_set_timestamp32(&gsm_data->last_tx, 0, 0);
+
     /* Put in the free list. */
     pj_mutex_lock(gsm_codec_factory.mutex);
     pj_list_push_front(&gsm_codec_factory.codec_list, codec);
@@ -497,23 +501,31 @@
     if (output_buf_len < 33)
 	return PJMEDIA_CODEC_EFRMTOOSHORT;
 
-    if (input->size < 320)
-	return PJMEDIA_CODEC_EPCMTOOSHORT;
+    PJ_ASSERT_RETURN(input->size==320, PJMEDIA_CODEC_EPCMFRMINLEN);
 
     /* Detect silence */
     if (gsm_data->vad_enabled) {
 	pj_bool_t is_silence;
+	pj_int32_t silence_duration;
+
+	silence_duration = pj_timestamp_diff32(&gsm_data->last_tx, 
+					       &input->timestamp);
 
 	is_silence = pjmedia_silence_det_detect(gsm_data->vad, 
 					        input->buf,
-						input->size / 2,
+						(input->size >> 1),
 						NULL);
-	if (is_silence) {
+	if (is_silence &&
+	    PJMEDIA_CODEC_MAX_SILENCE_PERIOD != -1 &&
+	    silence_duration < PJMEDIA_CODEC_MAX_SILENCE_PERIOD) 
+	{
 	    output->type = PJMEDIA_FRAME_TYPE_NONE;
 	    output->buf = NULL;
 	    output->size = 0;
-	    output->timestamp.u64 = input->timestamp.u64;
+	    output->timestamp = input->timestamp;
 	    return PJ_SUCCESS;
+	} else {
+	    gsm_data->last_tx = input->timestamp;
 	}
     }
 
diff --git a/pjmedia/src/pjmedia-codec/ilbc.c b/pjmedia/src/pjmedia-codec/ilbc.c
index a21b688..52cf940 100644
--- a/pjmedia/src/pjmedia-codec/ilbc.c
+++ b/pjmedia/src/pjmedia-codec/ilbc.c
@@ -128,6 +128,7 @@
     pjmedia_silence_det	*vad;
     pj_bool_t		 vad_enabled;
     pj_bool_t		 plc_enabled;
+    pj_timestamp	 last_tx;
 
     pj_bool_t		 enc_ready;
     iLBC_Enc_Inst_t	 enc;
@@ -426,6 +427,11 @@
     if (status != PJ_SUCCESS)
 	return status;
 
+    /* Init last_tx (not necessary because of zalloc, but better
+     * be safe in case someone remove zalloc later.
+     */
+    pj_set_timestamp32(&ilbc_codec->last_tx, 0, 0);
+
     PJ_LOG(5,(ilbc_codec->obj_name, 
 	      "iLBC codec opened, encoder mode=%d, decoder mode=%d",
 	      attr->setting.enc_fmtp_mode, attr->setting.dec_fmtp_mode));
@@ -512,23 +518,32 @@
     if (output_buf_len < ilbc_codec->enc_frame_size)
 	return PJMEDIA_CODEC_EFRMTOOSHORT;
 
-    if (input->size != ilbc_codec->enc_samples_per_frame * 2)
+    if (input->size != (ilbc_codec->enc_samples_per_frame << 1))
 	return PJMEDIA_CODEC_EPCMFRMINLEN;
 
     /* Detect silence */
     if (ilbc_codec->vad_enabled) {
 	pj_bool_t is_silence;
+	pj_int32_t silence_period;
+
+	silence_period = pj_timestamp_diff32(&ilbc_codec->last_tx,
+					      &input->timestamp);
 
 	is_silence = pjmedia_silence_det_detect(ilbc_codec->vad, 
 					        input->buf,
-						input->size / 2,
+						(input->size >> 1),
 						NULL);
-	if (is_silence) {
+	if (is_silence &&
+	    PJMEDIA_CODEC_MAX_SILENCE_PERIOD != -1 &&
+	    silence_period < PJMEDIA_CODEC_MAX_SILENCE_PERIOD)
+	{
 	    output->type = PJMEDIA_FRAME_TYPE_NONE;
 	    output->buf = NULL;
 	    output->size = 0;
-	    output->timestamp.u64 = input->timestamp.u64;
+	    output->timestamp = input->timestamp;
 	    return PJ_SUCCESS;
+	} else {
+	    ilbc_codec->last_tx = input->timestamp;
 	}
     }
 
@@ -544,7 +559,7 @@
 
     output->type = PJMEDIA_FRAME_TYPE_AUDIO;
     output->size = ilbc_codec->enc.no_of_bytes;
-    output->timestamp.u64 = input->timestamp.u64;
+    output->timestamp = input->timestamp;
 
     return PJ_SUCCESS;
 }
@@ -563,7 +578,7 @@
     pj_assert(ilbc_codec != NULL);
     PJ_ASSERT_RETURN(input && output, PJ_EINVAL);
 
-    if (output_buf_len < ilbc_codec->dec_samples_per_frame*2)
+    if (output_buf_len < (ilbc_codec->dec_samples_per_frame << 1))
 	return PJMEDIA_CODEC_EPCMTOOSHORT;
 
     if (input->size != ilbc_codec->dec_frame_size)
@@ -577,9 +592,9 @@
     for (i=0; i<ilbc_codec->dec_samples_per_frame; ++i) {
 	((short*)output->buf)[i] = (short)ilbc_codec->dec_block[i];
     }
-    output->size = ilbc_codec->dec_samples_per_frame * 2;
+    output->size = (ilbc_codec->dec_samples_per_frame << 1);
     output->type = PJMEDIA_FRAME_TYPE_AUDIO;
-    output->timestamp.u64 = input->timestamp.u64;
+    output->timestamp = input->timestamp;
 
     return PJ_SUCCESS;
 }
@@ -598,7 +613,7 @@
     pj_assert(ilbc_codec != NULL);
     PJ_ASSERT_RETURN(output, PJ_EINVAL);
 
-    if (output_buf_len < ilbc_codec->dec_samples_per_frame*2)
+    if (output_buf_len < (ilbc_codec->dec_samples_per_frame << 1))
 	return PJMEDIA_CODEC_EPCMTOOSHORT;
 
     /* Decode to temporary buffer */
@@ -608,7 +623,7 @@
     for (i=0; i<ilbc_codec->dec_samples_per_frame; ++i) {
 	((short*)output->buf)[i] = (short)ilbc_codec->dec_block[i];
     }
-    output->size = ilbc_codec->dec_samples_per_frame * 2;
+    output->size = (ilbc_codec->dec_samples_per_frame << 1);
     output->type = PJMEDIA_FRAME_TYPE_AUDIO;
 
     return PJ_SUCCESS;
diff --git a/pjmedia/src/pjmedia/g711.c b/pjmedia/src/pjmedia/g711.c
index 99b4ec3..afa469c 100644
--- a/pjmedia/src/pjmedia/g711.c
+++ b/pjmedia/src/pjmedia/g711.c
@@ -125,6 +125,7 @@
     pjmedia_plc		*plc;
     pj_bool_t		 vad_enabled;
     pjmedia_silence_det *vad;
+    pj_timestamp	 last_tx;
 };
 
 
@@ -465,37 +466,48 @@
     struct g711_private *priv = codec->codec_data;
 
     /* Check output buffer length */
-    if (output_buf_len < input->size / 2)
+    if (output_buf_len < (input->size >> 1))
 	return PJMEDIA_CODEC_EFRMTOOSHORT;
 
     /* Detect silence if VAD is enabled */
     if (priv->vad_enabled) {
 	pj_bool_t is_silence;
+	pj_int32_t silence_period;
+
+	silence_period = pj_timestamp_diff32(&priv->last_tx,
+					     &input->timestamp);
 
 	is_silence = pjmedia_silence_det_detect(priv->vad, input->buf, 
-						input->size / 2, NULL);
-	if (is_silence) {
+						(input->size >> 1), NULL);
+	if (is_silence && 
+	    PJMEDIA_CODEC_MAX_SILENCE_PERIOD != -1 &&
+	    silence_period < PJMEDIA_CODEC_MAX_SILENCE_PERIOD) 
+	{
 	    output->type = PJMEDIA_FRAME_TYPE_NONE;
 	    output->buf = NULL;
 	    output->size = 0;
-	    output->timestamp.u64 = input->timestamp.u64;
+	    output->timestamp = input->timestamp;
 	    return PJ_SUCCESS;
+	} else {
+	    priv->last_tx = input->timestamp;
 	}
     }
 
     /* Encode */
     if (priv->pt == PJMEDIA_RTP_PT_PCMA) {
-	unsigned i;
+	unsigned i, n;
 	pj_uint8_t *dst = output->buf;
 
-	for (i=0; i!=input->size/2; ++i, ++dst) {
+	n = (input->size >> 1);
+	for (i=0; i!=n; ++i, ++dst) {
 	    *dst = pjmedia_linear2alaw(samples[i]);
 	}
     } else if (priv->pt == PJMEDIA_RTP_PT_PCMU) {
-	unsigned i;
+	unsigned i, n;
 	pj_uint8_t *dst = output->buf;
 
-	for (i=0; i!=input->size/2; ++i, ++dst) {
+	n = (input->size >> 1);
+	for (i=0; i!=n; ++i, ++dst) {
 	    *dst = pjmedia_linear2ulaw(samples[i]);
 	}
 
@@ -504,7 +516,7 @@
     }
 
     output->type = PJMEDIA_FRAME_TYPE_AUDIO;
-    output->size = input->size / 2;
+    output->size = (input->size >> 1);
 
     return PJ_SUCCESS;
 }
@@ -517,7 +529,7 @@
     struct g711_private *priv = codec->codec_data;
 
     /* Check output buffer length */
-    PJ_ASSERT_RETURN(output_buf_len >= input->size * 2,
+    PJ_ASSERT_RETURN(output_buf_len >= (input->size << 1),
 		     PJMEDIA_CODEC_EPCMTOOSHORT);
 
     /* Input buffer MUST have exactly 80 bytes long */
@@ -547,7 +559,7 @@
     }
 
     output->type = PJMEDIA_FRAME_TYPE_AUDIO;
-    output->size = input->size * 2;
+    output->size = (input->size << 1);
 
     if (priv->plc_enabled)
 	pjmedia_plc_save( priv->plc, output->buf);
diff --git a/pjmedia/src/pjmedia/silencedet.c b/pjmedia/src/pjmedia/silencedet.c
index 6d6db1b..e72dfd0 100644
--- a/pjmedia/src/pjmedia/silencedet.c
+++ b/pjmedia/src/pjmedia/silencedet.c
@@ -142,7 +142,7 @@
     if (min_signal < 0)
 	min_signal = sd->ptime;
     if (recalc_time < 0)
-	recalc_time = 5000;
+	recalc_time = 2000;
 
     sd->min_signal_cnt = min_signal / sd->ptime;
     sd->min_silence_cnt = min_silence / sd->ptime;
@@ -256,10 +256,10 @@
 
 	    /* Adjust according to signal/silence proportions. */
 	    if (pct_signal > 95) {
-		new_threshold += (sd->weakest_signal - sd->cur_threshold)/4;
+		new_threshold += (sd->weakest_signal+1 - sd->cur_threshold)/2;
 	    } else if (pct_signal < 5) {
 		new_threshold = (sd->cur_threshold+sd->loudest_silence)/2+1;
-	    } else if (pct_signal > 90) {
+	    } else if (pct_signal > 80) {
 		new_threshold++;
 	    } else if (pct_signal < 10) {
 		new_threshold--;
@@ -268,9 +268,12 @@
 	    }
 
 	    if (updated && sd->cur_threshold != new_threshold) {
+		PJ_LOG(5,(sd->objname, 
+			  "Vad cur_threshold updated %d-->%d. "
+			  "Signal lo=%d",
+			  sd->cur_threshold, new_threshold,
+			  sd->weakest_signal));
 		sd->cur_threshold = new_threshold;
-		PJ_LOG(5,(sd->objname, "Vad cur_threshold updated to %d",
-			  sd->cur_threshold));
 	    }
 	}
 
diff --git a/pjmedia/src/pjmedia/stream.c b/pjmedia/src/pjmedia/stream.c
index 748c222..de6ca8e 100644
--- a/pjmedia/src/pjmedia/stream.c
+++ b/pjmedia/src/pjmedia/stream.c
@@ -561,13 +561,21 @@
 					 &rtphdrlen);
 
     } else if (frame->type != PJMEDIA_FRAME_TYPE_NONE) {
-	unsigned ts;
+	unsigned ts, codec_samples_per_frame;
 
 	/* Repeatedly call encode if there are multiple frames to be
 	 * sent.
 	 */
+	codec_samples_per_frame = stream->codec_param.info.enc_ptime *
+				  stream->codec_param.info.clock_rate /
+				  1000;
+	if (codec_samples_per_frame == 0) {
+	    codec_samples_per_frame = stream->codec_param.info.frm_ptime *
+				      stream->codec_param.info.clock_rate /
+				      1000;
+	}
 
-	for (ts=0; ts<ts_len; ts += samples_per_frame) {
+	for (ts=0; ts<ts_len; ts += codec_samples_per_frame) {
 	    pjmedia_frame tmp_out_frame, tmp_in_frame;
 	    unsigned bytes_per_sample, max_size;
 
@@ -575,8 +583,9 @@
 	    bytes_per_sample = stream->codec_param.info.pcm_bits_per_sample/8;
 
 	    /* Split original PCM input frame into base frame size */
+	    tmp_in_frame.timestamp.u64 = frame->timestamp.u64 + ts;
 	    tmp_in_frame.buf = ((char*)frame->buf) + ts * bytes_per_sample;
-	    tmp_in_frame.size = samples_per_frame * bytes_per_sample;
+	    tmp_in_frame.size = codec_samples_per_frame * bytes_per_sample;
 	    tmp_in_frame.type = PJMEDIA_FRAME_TYPE_AUDIO;
 
 	    /* Set output frame position */
@@ -689,11 +698,31 @@
 			      const pjmedia_frame *frame )
 {
     pjmedia_stream *stream = port->port_data.pdata;
-    pjmedia_frame tmp_in_frame;
+    pjmedia_frame tmp_zero_frame;
     unsigned samples_per_frame;
 
     samples_per_frame = stream->enc_samples_per_frame;
 
+    /* http://www.pjsip.org/trac/ticket/56:
+     *  when input is PJMEDIA_FRAME_TYPE_NONE, feed zero PCM frame
+     *  instead so that encoder can decide whether or not to transmit
+     *  silence frame.
+     */
+    if (frame->type == PJMEDIA_FRAME_TYPE_NONE &&
+	samples_per_frame <= ZERO_PCM_MAX_SIZE) 
+    {
+	pj_memcpy(&tmp_zero_frame, frame, sizeof(pjmedia_frame));
+	frame = &tmp_zero_frame;
+
+	tmp_zero_frame.buf = zero_frame;
+	tmp_zero_frame.size = samples_per_frame * 2;
+	tmp_zero_frame.type = PJMEDIA_FRAME_TYPE_AUDIO;
+    }
+
+#if 0
+    // This is no longer needed because each TYPE_NONE frame will
+    // be converted into zero frame above
+
     /* If VAD is temporarily disabled during creation, feed zero PCM frame
      * to the codec.
      */
@@ -709,6 +738,7 @@
 	tmp_in_frame.size = samples_per_frame * 2;
 	tmp_in_frame.type = PJMEDIA_FRAME_TYPE_AUDIO;
     }
+#endif
 
     /* If VAD is temporarily disabled during creation, enable it
      * after transmitting for VAD_SUSPEND_SEC seconds.