Optimizations mostly in the conference bridge (gained more than 3x faster)

git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@513 74dad513-b988-da41-8d7b-12977e46ad98
diff --git a/pjlib/include/pj/doxygen.h b/pjlib/include/pj/doxygen.h
index f4bf8db..4f14dba 100644
--- a/pjlib/include/pj/doxygen.h
+++ b/pjlib/include/pj/doxygen.h
@@ -34,12 +34,12 @@
  *
  * @section intro_sec What is PJLIB
  *
- * PJLIB is a small foundation library written in C for making scalable 
- * applications. Because of its small footprint, it can be used in embedded 
- * applications (we hope so!), but yet the library is also aimed for 
- * facilitating high performance protocol stacks.
+ * PJLIB is an Open Source, small footprint framework library written in C for 
+ * making scalable applications. Because of its small footprint, it can be used
+ * in embedded applications (we hope so!), but yet the library is also aimed for
+ * facilitating the creation of high performance protocol stacks.
  *
- * PJLIB is released under LGPL terms.
+ * PJLIB is released under GPL terms.
  *
  * @section download_sec Download
  *
@@ -55,7 +55,7 @@
  *
  * @subsection doc_ver_subsec Version
  *
- * This document corresponds to PJLIB version 0.3-pre2.
+ * This document corresponds to PJLIB version 0.5.6.
  *
  *
  * @subsection find_samples_subsec How to Read This Document
@@ -114,10 +114,8 @@
  *
  * @subsection open_source_feat It's Open Source!
  *
- * PJLIB is currently released on LGPL license. We may release PJLIB under
- * additional schemes in the future (such as GPL or MPL) to incorporate
- * linking with specific application, however, one thing for sure is
- * we will NEVER be able to make PJLIB a proprietary software.
+ * PJLIB is currently released on GPL license, but other arrangements
+ * can be made with the author.
  *
  * @subsection extreme_portable_feat Extreme Portability
  *
@@ -127,18 +125,18 @@
  * floating point. Multi-threading or not.
  * It can even run in environment where no ANSI LIBC is available. 
  *
- * Currently PJLIB is being ported to:
- *  - x86, Win32 (Win95/98/ME, NT/2000/XP/2003, mingw).
- *  - x86, Linux (user mode and as <b>kernel module</b>(!)).
- *  - alpha, Linux
- * And coming up:
- *  - x86, eCos
- *  - ultra-II, Solaris.
- *  - powerpc, MacOS
- *  - m68k, PalmOS.
- *  - arm, PocketPC
+ * Currently PJLIB is known to run on these platforms:
+ *  - Win32/x86 (Win95/98/ME, NT/2000/XP/2003, mingw).
+ *  - arm, WinCE and Windows Mobile.
+ *  - Linux/x86, (user mode and as <b>kernel module</b>(!)).
+ *  - Linux/alpha
+ *  - Solaris/ultra.
+ *  - MacOS X/powerpc
+ *  - RTEMS (x86 and powerpc).
  *
- * No other library is known to have this extreme portability!
+ * And efforts is under way to port PJLIB on:
+ *  - Symbian OS
+ *
  *
  * @subsection small_size_feat Small in Size
  *
@@ -151,6 +149,13 @@
  *
  * For more info, please see @ref pj_config.
  *
+ *
+ * @subsection big_perform_feat Big in Performance
+ *
+ * Almost everything in PJLIB is designed to achieve the highest possible
+ * performance out of the target platform. 
+ *
+ *
  * @subsection no_dyn_mem No Dynamic Memory Allocations
  *
  * The central idea of PJLIB is that for applications to run as fast as it can,
@@ -164,8 +169,9 @@
  *  - no \a free() is required. All chunks will be deleted when the pool is 
  *    destroyed.
  *
- * The performance gained on some systems can be as high as 10x speed up
- * against \a malloc() and \a free().
+ * The performance gained on some systems can be as high as 30x speed up
+ * against \a malloc() and \a free() on certain configurations, but of
+ * course your mileage may vary. 
  *
  * For more information, see \ref PJ_POOL_GROUP
  *
@@ -383,11 +389,11 @@
  * @subsubsection mem_alloc_subsubsec Use Pool for Memory Allocations
  *
  * You MUST NOT use \a malloc() or any other memory allocation functions.
- * Use PJLIB pool instead! It's faster and most portable.
+ * Use PJLIB @ref PJ_POOL_GROUP instead! It's faster and most portable.
  *
  * @subsection logging_subsubsec Use Logging for Text Display
  *
- * DO NOT use <stdio.h> for text output. Use PJLIB logging instead.
+ * DO NOT use <stdio.h> for text output. Use PJLIB @ref PJ_LOG instead.
  *
  *
  * @section porting_pjlib_sec0 Porting PJLIB
diff --git a/pjmedia/include/pjmedia/conference.h b/pjmedia/include/pjmedia/conference.h
index 208a874..7346f7c 100644
--- a/pjmedia/include/pjmedia/conference.h
+++ b/pjmedia/include/pjmedia/conference.h
@@ -44,7 +44,8 @@
     pj_str_t		name;		    /**< Port name.		    */
     pjmedia_port_op	tx_setting;	    /**< Transmit settings.	    */
     pjmedia_port_op	rx_setting;	    /**< Receive settings.	    */
-    pj_bool_t	       *listener;	    /**< Array of listeners.	    */
+    unsigned		listener_cnt;	    /**< Number of listeners.	    */
+    unsigned	       *listener_slots;	    /**< Array of listeners.	    */
     unsigned		clock_rate;	    /**< Clock rate of the port.    */
     unsigned		channel_count;	    /**< Number of channels.	    */
     unsigned		samples_per_frame;  /**< Samples per frame	    */
diff --git a/pjmedia/include/pjmedia/types.h b/pjmedia/include/pjmedia/types.h
index 90f90b5..02edd9f 100644
--- a/pjmedia/include/pjmedia/types.h
+++ b/pjmedia/include/pjmedia/types.h
@@ -20,7 +20,9 @@
 #define __PJMEDIA_TYPES_H__
 
 #include <pjmedia/config.h>
-#include <pj/sock.h>
+#include <pj/sock.h>	    /* pjmedia_sock_info	*/
+#include <pj/string.h>	    /* pj_memcpy(), pj_memset() */
+
 
 /** 
  * Top most media type. 
@@ -124,9 +126,16 @@
  */
 PJ_INLINE(void) pjmedia_zero_samples(pj_int16_t *samples, unsigned count)
 {
+#if 1
+    pj_memset(samples, 0, count*sizeof(pj_int16_t));
+#elif 0
     unsigned i;
-    for (i=0; i<count; ++i)
-	samples[i] = 0;
+    for (i=0; i<count; ++i) samples[i] = 0;
+#else
+    unsigned i;
+    count >>= 1;
+    for (i=0; i<count; ++i) ((pj_int32_t*)samples)[i] = (pj_int32_t)0;
+#endif
 }
 
 
@@ -139,9 +148,17 @@
 PJ_INLINE(void) pjmedia_copy_samples(pj_int16_t *dst, const pj_int16_t *src,
 				     unsigned count)
 {
+#if 1
+    pj_memcpy(dst, src, count*sizeof(pj_int16_t));
+#elif 0
     unsigned i;
-    for (i=0; i<count; ++i)
-	dst[i] = src[i];
+    for (i=0; i<count; ++i) dst[i] = src[i];
+#else
+    unsigned i;
+    count >>= 1;
+    for (i=0; i<count; ++i) 
+	((pj_int32_t*)dst)[i] = ((pj_int32_t*)src)[i];
+#endif
 }
 
 
diff --git a/pjmedia/src/pjmedia/conference.c b/pjmedia/src/pjmedia/conference.c
index 2a55efd..abf522b 100644
--- a/pjmedia/src/pjmedia/conference.c
+++ b/pjmedia/src/pjmedia/conference.c
@@ -23,6 +23,7 @@
 #include <pjmedia/silencedet.h>
 #include <pjmedia/sound_port.h>
 #include <pjmedia/stream.h>
+#include <pj/array.h>
 #include <pj/assert.h>
 #include <pj/log.h>
 #include <pj/pool.h>
@@ -57,6 +58,8 @@
 #define BYTES_PER_SAMPLE    2
 
 #define NORMAL_LEVEL	    128
+#define SLOT_TYPE	    unsigned
+#define INVALID_SLOT	    ((SLOT_TYPE)-1)
 
 
 /*
@@ -78,8 +81,9 @@
     pjmedia_port	*port;		/**< get_frame() and put_frame()    */
     pjmedia_port_op	 rx_setting;	/**< Can we receive from this port  */
     pjmedia_port_op	 tx_setting;	/**< Can we transmit to this port   */
-    int			 listener_cnt;	/**< Number of listeners.	    */
-    pj_bool_t		*listeners;	/**< Array of listeners.	    */
+    unsigned		 listener_cnt;	/**< Number of listeners.	    */
+    SLOT_TYPE		*listener_slots;/**< Array of listeners.	    */
+    unsigned		 transmitter_cnt;/**<Number of transmitters.	    */
     pjmedia_silence_det	*vad;		/**< VAD for this port.		    */
 
     /* Shortcut for port info. */
@@ -87,7 +91,6 @@
     unsigned		 samples_per_frame; /**< Port's samples per frame.  */
 
     /* Calculated signal levels: */
-    pj_bool_t		 need_tx_level;	/**< Need to calculate tx level?    */
     unsigned		 tx_level;	/**< Last tx level to this port.    */
     unsigned		 rx_level;	/**< Last rx level from this port.  */
 
@@ -121,7 +124,7 @@
     /* Mix buf is a temporary buffer used to calculate the average signal
      * received by this port from all other ports. Samples from all ports
      * that are transmitting to this port will be accumulated here, then
-     * they will be divided by the sources count before the samples are put
+     * they will be divided by the source level before the samples are put
      * to the TX buffer of this port.
      *
      * This buffer contains samples at bridge's clock rate.
@@ -129,7 +132,8 @@
      *
      * Note that the samples here are unsigned 32bit.
      */
-    unsigned		 sources;	/**< Number of sources.		    */
+    unsigned		 src_level;	/**< Sum of input levels	    */
+    unsigned		 src_cnt;	/**< Number of sources.		    */
     pj_uint32_t		*mix_buf;	/**< Total sum of signal.	    */
 
     /* Tx buffer is a temporary buffer to be used when there's mismatch 
@@ -223,10 +227,9 @@
     conf_port->rx_adj_level = NORMAL_LEVEL;
 
     /* Create transmit flag array */
-    conf_port->listeners = pj_pool_zalloc(pool, 
-					  conf->max_ports*sizeof(pj_bool_t));
-    PJ_ASSERT_RETURN(conf_port->listeners, PJ_ENOMEM);
-
+    conf_port->listener_slots = pj_pool_zalloc(pool, 
+					  conf->max_ports * sizeof(SLOT_TYPE));
+    PJ_ASSERT_RETURN(conf_port->listener_slots, PJ_ENOMEM);
 
     /* Save some port's infos, for convenience. */
     if (port) {
@@ -666,6 +669,7 @@
 {
     struct conf_port *src_port, *dst_port;
     pj_bool_t start_sound = PJ_FALSE;
+    unsigned i;
 
     /* Check arguments */
     PJ_ASSERT_RETURN(conf && src_slot<conf->max_ports && 
@@ -683,17 +687,26 @@
     src_port = conf->ports[src_slot];
     dst_port = conf->ports[sink_slot];
 
-    if (src_port->listeners[sink_slot] == 0) {
-	src_port->listeners[sink_slot] = 1;
+    /* Check if connection has been made */
+    for (i=0; i<src_port->listener_cnt; ++i) {
+	if (src_port->listener_slots[i] == sink_slot)
+	    break;
+    }
+
+    if (i == src_port->listener_cnt) {
+	src_port->listener_slots[src_port->listener_cnt] = sink_slot;
 	++conf->connect_cnt;
 	++src_port->listener_cnt;
+	++dst_port->transmitter_cnt;
 
 	if (conf->connect_cnt == 1)
 	    start_sound = 1;
 
-	PJ_LOG(4,(THIS_FILE,"Port %.*s transmitting to port %.*s",
+	PJ_LOG(4,(THIS_FILE,"Port %d (%.*s) transmitting to port %d (%.*s)",
+		  src_slot,
 		  (int)src_port->name.slen,
 		  src_port->name.ptr,
+		  sink_slot,
 		  (int)dst_port->name.slen,
 		  dst_port->name.ptr));
     }
@@ -718,6 +731,7 @@
 						  unsigned sink_slot )
 {
     struct conf_port *src_port, *dst_port;
+    unsigned i;
 
     /* Check arguments */
     PJ_ASSERT_RETURN(conf && src_slot<conf->max_ports && 
@@ -732,14 +746,29 @@
     src_port = conf->ports[src_slot];
     dst_port = conf->ports[sink_slot];
 
-    if (src_port->listeners[sink_slot] != 0) {
-	src_port->listeners[sink_slot] = 0;
+    /* Check if connection has been made */
+    for (i=0; i<src_port->listener_cnt; ++i) {
+	if (src_port->listener_slots[i] == sink_slot)
+	    break;
+    }
+
+    if (i != src_port->listener_cnt) {
+	pj_assert(src_port->listener_cnt > 0 && 
+		  src_port->listener_cnt < conf->max_ports);
+	pj_assert(dst_port->transmitter_cnt > 0 && 
+		  dst_port->transmitter_cnt < conf->max_ports);
+	pj_array_erase(src_port->listener_slots, sizeof(SLOT_TYPE), 
+		       src_port->listener_cnt, i);
 	--conf->connect_cnt;
 	--src_port->listener_cnt;
+	--dst_port->transmitter_cnt;
 
-	PJ_LOG(4,(THIS_FILE,"Port %.*s stop transmitting to port %.*s",
+	PJ_LOG(4,(THIS_FILE,
+		  "Port %d (%.*s) stop transmitting to port %d (%.*s)",
+		  src_slot,
 		  (int)src_port->name.slen,
 		  src_port->name.ptr,
+		  sink_slot,
 		  (int)dst_port->name.slen,
 		  dst_port->name.ptr));
 
@@ -784,26 +813,30 @@
 
     /* Remove this port from transmit array of other ports. */
     for (i=0; i<conf->max_ports; ++i) {
+	unsigned j;
+
 	conf_port = conf->ports[i];
 
 	if (!conf_port)
 	    continue;
 
-	if (conf_port->listeners[port] != 0) {
-	    --conf->connect_cnt;
-	    --conf_port->listener_cnt;
-	    conf_port->listeners[port] = 0;
+	if (conf_port->listener_cnt == 0)
+	    continue;
+
+	for (j=0; j<conf_port->listener_cnt; ++j) {
+	    if (conf_port->listener_slots[j] == port) {
+		pj_array_erase(conf_port->listener_slots, sizeof(SLOT_TYPE),
+			       conf_port->listener_cnt, j);
+		--conf->connect_cnt;
+		--conf_port->listener_cnt;
+		break;
+	    }
 	}
     }
 
-    /* Remove all ports listening from this port. */
+    /* Update conf's connection count. */
     conf_port = conf->ports[port];
-    for (i=0; i<conf->max_ports; ++i) {
-	if (conf_port->listeners[i]) {
-	    --conf->connect_cnt;
-	    --conf_port->listener_cnt;
-	}
-    }
+    conf->connect_cnt -= conf_port->listener_cnt;
 
     /* Remove the port. */
     conf->ports[port] = NULL;
@@ -864,7 +897,8 @@
     info->name = conf_port->name;
     info->tx_setting = conf_port->tx_setting;
     info->rx_setting = conf_port->rx_setting;
-    info->listener = conf_port->listeners;
+    info->listener_cnt = conf_port->listener_cnt;
+    info->listener_slots = conf_port->listener_slots;
     info->clock_rate = conf_port->clock_rate;
     info->channel_count = conf->channel_count;
     info->samples_per_frame = conf_port->samples_per_frame;
@@ -916,7 +950,6 @@
     conf_port = conf->ports[slot];
 
     if (tx_level != NULL) {
-	conf_port->need_tx_level = 1;
 	*tx_level = conf_port->tx_level;
     }
 
@@ -1120,10 +1153,10 @@
      * transmit NULL frame. 
      */
     /* note:
-     *  the "cport->sources==0" checking will cause discontinuous
+     *  the "cport->src_level==0" checking will cause discontinuous
      *  transmission for RTP stream.
      */
-    if (cport->tx_setting == PJMEDIA_PORT_MUTE || cport->sources==0) {
+    if (cport->tx_setting == PJMEDIA_PORT_MUTE || cport->src_level==0) {
 
 	pjmedia_frame frame;
 
@@ -1143,6 +1176,39 @@
 	return PJ_SUCCESS;
     }
 
+    buf = (pj_int16_t*)cport->mix_buf;
+
+    /* This is the convention set in get_frame(). For optimization purpose,
+     * if we only have one transmitter transmitting to this port, then
+     * the transmitter will directly copy the original 16bit frame to
+     * mix_buf.
+     */
+    if (cport->transmitter_cnt==1 && cport->src_cnt == 1) {
+
+	/* But still see if we need to adjust the level */
+	if (cport->tx_adj_level != NORMAL_LEVEL) {
+	    pj_int16_t *input = buf;
+	    pj_int32_t adj = cport->tx_adj_level;
+
+	    for (j=0; j<conf->samples_per_frame; ++j) {
+		pj_int32_t itemp;
+
+		/* For the level adjustment, we need to store the sample to
+		 * a temporary 32bit integer value to avoid overflowing the
+		 * 16bit sample storage.
+		 */
+		itemp = input[j];
+		itemp = itemp * adj / NORMAL_LEVEL;
+
+		/* Clip the signal if it's too loud */
+		if (itemp > 32767) itemp = 32767;
+		else if (itemp < -32768) itemp = -32768;
+
+		input[j] = (pj_int16_t) itemp;
+	    }
+	}
+
+    } 
     /* If there are sources in the mix buffer, convert the mixed samples
      * to the mixed samples itself. This is possible because mixed sample
      * is 32bit.
@@ -1150,9 +1216,7 @@
      * In addition to this process, if we need to change the level of
      * TX signal, we adjust is here too.
      */
-    buf = (pj_int16_t*)cport->mix_buf;
-
-    if (cport->tx_adj_level != NORMAL_LEVEL && cport->sources) {
+    else if (cport->tx_adj_level != NORMAL_LEVEL && cport->src_level) {
 
 	unsigned adj_level = cport->tx_adj_level;
 
@@ -1163,7 +1227,7 @@
 	    /* Calculate average level, and convert the sample to
 	     * 16bit signed integer.
 	     */
-	    itemp = unsigned2pcm(cport->mix_buf[j] / cport->sources);
+	    itemp = unsigned2pcm(cport->mix_buf[j] / cport->src_level);
 
 	    /* Adjust the level */
 	    itemp = itemp * adj_level / NORMAL_LEVEL;
@@ -1176,15 +1240,19 @@
 	    buf[j] = (pj_int16_t) itemp;
 	}
 
-    } else if (cport->sources) {
+    } else if (cport->src_level) {
 	/* No need to adjust signal level. */
 	for (j=0; j<conf->samples_per_frame; ++j) {
-	    buf[j] = unsigned2pcm(cport->mix_buf[j] / cport->sources);
+	    buf[j] = unsigned2pcm(cport->mix_buf[j] / cport->src_level);
 	}
     } else {
 	// Not necessarry. Buffer has been zeroed before.
 	// pjmedia_zero_samples(buf, conf->samples_per_frame);
-	pj_assert(buf[0] == 0);
+	//pj_assert(buf[0] == 0);
+
+	// This shouldn't happen. Function should've already bailed out when
+	// cport->src_level == 0.
+	pj_assert(0);
     }
 
     /* Calculate TX level if we need to do so. 
@@ -1196,15 +1264,8 @@
      * for VU meter display. By doing it here, it should give the acceptable
      * indication of the signal level of the port.
      */
-    if (cport->need_tx_level && cport->sources) {
-	pj_uint32_t level;
-
-	/* Get the signal level. */
-	level = pjmedia_calc_avg_signal(buf, conf->samples_per_frame);
-
-	/* Convert level to 8bit complement ulaw */
-	cport->tx_level = linear2ulaw(level) ^ 0xff;
-
+    if (cport->src_cnt) {
+	cport->tx_level = cport->src_level / cport->src_cnt;
     } else {
 	cport->tx_level = 0;
     }
@@ -1310,13 +1371,14 @@
     pj_assert(frame->size == conf->samples_per_frame *
 			     conf->bits_per_sample / 8);
 
-    /* Must lock mutex (must we??) */
+    /* Must lock mutex */
     pj_mutex_lock(conf->mutex);
 
-    /* Zero all port's temporary buffers. */
+    /* Reset port source count. We will only reset port's mix
+     * buffer when we have someone transmitting to it.
+     */
     for (i=0, ci=0; i<conf->max_ports && ci < conf->port_cnt; ++i) {
 	struct conf_port *conf_port = conf->ports[i];
-	pj_uint32_t *mix_buf;
 
 	/* Skip empty slot. */
 	if (!conf_port)
@@ -1324,10 +1386,9 @@
 
 	++ci;
 
-	conf_port->sources = 0;
-	mix_buf = conf_port->mix_buf;
-
-	pj_memset(mix_buf, 0, conf->samples_per_frame*sizeof(mix_buf[0]));
+	/* Reset sources */
+	conf_port->src_level = 0;
+	conf_port->src_cnt = 0;
     }
 
     /* Get frames from all ports, and "mix" the signal 
@@ -1441,39 +1502,53 @@
 	/* Put this level to port's last RX level. */
 	conf_port->rx_level = level;
 
+	/* Skip processing frame if level is zero */
+	if (level == 0)
+	    continue;
+
 	/* Convert the buffer to unsigned 16bit value */
 	for (j=0; j<conf->samples_per_frame; ++j)
 	    conf->uns_buf[j] = pcm2unsigned(((pj_int16_t*)frame->buf)[j]);
 
 	/* Add the signal to all listeners. */
-	for (j=0, cj=0; 
-	     j<conf->max_ports && cj<(unsigned)conf_port->listener_cnt;
-	     ++j) 
+	for (cj=0; cj < conf_port->listener_cnt; ++cj) 
 	{
-	    struct conf_port *listener = conf->ports[j];
+	    struct conf_port *listener;
 	    pj_uint32_t *mix_buf;
 	    unsigned k;
 
-	    if (listener == 0)
-		continue;
-
-	    /* Skip if this is not the listener. */
-	    if (!conf_port->listeners[j])
-		continue;
-
-	    /* Var "cj" is the number of listeners we have visited so far */
-	    ++cj;
+	    listener = conf->ports[conf_port->listener_slots[cj]];
 
 	    /* Skip if this listener doesn't want to receive audio */
 	    if (listener->tx_setting != PJMEDIA_PORT_ENABLE)
 		continue;
 
-	    /* Mix the buffer */
+	    /* Mix the buffer. If this is the first source for target port,
+	     * zero the mix buffer of target port first.
+	     */
 	    mix_buf = listener->mix_buf;
-	    for (k=0; k<conf->samples_per_frame; ++k)
-		mix_buf[k] += (conf->uns_buf[k] * level);
+	    if (listener->src_level == 0) {
+		pj_memset(mix_buf, 0, 
+			  conf->samples_per_frame*sizeof(mix_buf[0]));
+	    }
 
-	    listener->sources += level;
+	    /* A little bit of optimization:
+	     *  When "conf_port" is the only transmitter to "listener",
+	     *  just add copy the frame directly from the original
+	     *  16bit frame (avoiding unsigned2pcm() conversion).
+	     *  But write_port() needs to be aware of this trick!
+	     */
+	    if (listener->transmitter_cnt == 1) {
+		pjmedia_copy_samples((pj_int16_t*)mix_buf, 
+				     frame->buf, conf->samples_per_frame);
+		listener->src_level = level;
+	    } else {
+		for (k=0; k<conf->samples_per_frame; ++k)
+		    mix_buf[k] += (conf->uns_buf[k] * level);
+
+		listener->src_level += level;
+	    }
+	    listener->src_cnt++;
 	}
     }
 
@@ -1510,7 +1585,7 @@
     }
 
     /* Return sound playback frame. */
-    if (conf->ports[0]->sources) {
+    if (conf->ports[0]->src_level) {
 	TRACE_((THIS_FILE, "write to audio, count=%d", 
 			   conf->samples_per_frame));
 
diff --git a/pjmedia/src/pjmedia/dsound.c b/pjmedia/src/pjmedia/dsound.c
index 9e1d7a5..81ea2f2 100644
--- a/pjmedia/src/pjmedia/dsound.c
+++ b/pjmedia/src/pjmedia/dsound.c
@@ -368,9 +368,9 @@
     
     if SUCCEEDED(hr) { 
 	// Read from pointers. 
-	CopyMemory(lpbSoundData, lpvPtr1, dwBytes1); 
+	pj_memcpy(lpbSoundData, lpvPtr1, dwBytes1); 
 	if (lpvPtr2 != NULL)
-	    CopyMemory(lpbSoundData+dwBytes1, lpvPtr2, dwBytes2); 
+	    pj_memcpy(lpbSoundData+dwBytes1, lpvPtr2, dwBytes2); 
 	
 	// Release the data back to DirectSound. 
 	hr = IDirectSoundCaptureBuffer_Unlock(lpDsb, lpvPtr1, dwBytes1, lpvPtr2, dwBytes2); 
@@ -407,9 +407,9 @@
 				      &lpvPtr1, &dwBytes1, &lpvPtr2, &dwBytes2, 0); 
     } 
     if SUCCEEDED(hr) { 
-	CopyMemory(lpvPtr1, lpbSoundData, dwBytes1); 
+	pj_memcpy(lpvPtr1, lpbSoundData, dwBytes1); 
 	if (NULL != lpvPtr2) 
-	    CopyMemory(lpvPtr2, lpbSoundData+dwBytes1, dwBytes2); 
+	    pj_memcpy(lpvPtr2, lpbSoundData+dwBytes1, dwBytes2); 
 	
 	hr = IDirectSoundBuffer_Unlock(lpDsb, lpvPtr1, dwBytes1, lpvPtr2, dwBytes2); 
 	if SUCCEEDED(hr)
diff --git a/pjmedia/src/pjmedia/resample.c b/pjmedia/src/pjmedia/resample.c
index 6ea621c..a30215f 100644
--- a/pjmedia/src/pjmedia/resample.c
+++ b/pjmedia/src/pjmedia/resample.c
@@ -58,9 +58,6 @@
  *  - move FilterUp() and FilterUD() from filterkit.c
  *  - move stddefs.h and resample.h to this file.
  *  - const correctness.
- *  - fixed SrcLinear() may write pass output buffer.
- *  - assume the same for SrcUp() and SrcUD(), so put the same
- *    protection.
  */
 #include <pjmedia/resample.h>
 #include <pjmedia/errno.h>
@@ -260,7 +257,7 @@
     Ystart = Y;
     Yend = Ystart + (unsigned)(nx * pFactor);
     endTime = time + (1<<Np)*(WORD)nx;
-    while (time < endTime && Y < Yend)	/* bennylp fix: added Y < Yend */
+    while (time < endTime)
     {
 	iconst = (time) & Pmask;
 	xp = &X[(time)>>Np];      /* Ptr to current input sample */
@@ -399,7 +396,7 @@
     Ystart = Y;
     Yend = Ystart + (unsigned)(nx * pFactor);
     endTime = time + (1<<Np)*(WORD)nx;
-    while (time < endTime && Y < Yend)	/* bennylp fix: protect Y */
+    while (time < endTime)
     {
 	xp = &X[time>>Np];      /* Ptr to current input sample */
 	/* Perform left-wing inner product */
@@ -443,7 +440,7 @@
     Ystart = Y;
     Yend = Ystart + (unsigned)(nx * pFactor);
     endTime = time + (1<<Np)*(WORD)nx;
-    while (time < endTime && Y < Yend) /* bennylp fix: protect Y */
+    while (time < endTime)
     {
 	xp = &X[time>>Np];	/* Ptr to current input sample */
 	v = FilterUD(pImp, pImpD, pNwing, Interp, xp, (HWORD)(time&Pmask),
@@ -495,9 +492,11 @@
 
     /*
      * If we're downsampling, always use the fast algorithm since it seems
-     * to yield the same performance.
+     * to yield the same quality.
      */
     if (rate_out < rate_in) {
+	//no this is not a good idea. It sounds pretty good with speech,
+	//but very poor with background noise etc.
 	//high_quality = 0;
     }
 
@@ -533,7 +532,6 @@
 
     if (high_quality) {
 	unsigned size;
-	unsigned i;
 
 	/* This is a bug in xoff calculation, thanks Stephane Lussier
 	 * of Macadamian dot com.
@@ -551,9 +549,7 @@
 	resample->buffer = pj_pool_alloc(pool, size);
 	PJ_ASSERT_RETURN(resample->buffer, PJ_ENOMEM);
 
-	for (i=0; i<resample->xoff*2; ++i) {
-	    resample->buffer[i] = 0;
-	}
+	pjmedia_zero_samples(resample->buffer, resample->xoff*2);
 
 
     } else {
@@ -561,6 +557,12 @@
     }
 
     *p_resample = resample;
+
+    PJ_LOG(5,(THIS_FILE, "resample created: %s qualiy, %s filter, in/out "
+			  "rate=%d/%d", 
+			  (high_quality?"high":"low"),
+			  (large_filter?"large":"small"),
+			  rate_in, rate_out));
     return PJ_SUCCESS;
 }
 
@@ -573,7 +575,6 @@
     PJ_ASSERT_ON_FAIL(resample, return);
 
     if (resample->high_quality) {
-	unsigned i;
 	pj_int16_t *dst_buf;
 	const pj_int16_t *src_buf;
 
@@ -644,7 +645,7 @@
 	 *
 	 */
 	dst_buf = resample->buffer + resample->xoff*2;
-	for (i=0; i<resample->frame_size; ++i) dst_buf[i] = input[i];
+	pjmedia_copy_samples(dst_buf, input, resample->frame_size);
 	    
 	if (resample->factor >= 1) {
 
@@ -688,9 +689,7 @@
 
 	dst_buf = resample->buffer;
 	src_buf = input + resample->frame_size - resample->xoff*2;
-	for (i=0; i<resample->xoff * 2; ++i) {
-	    dst_buf[i] = src_buf[i];
-	}
+	pjmedia_copy_samples(dst_buf, src_buf, resample->xoff * 2);
 
     } else {
 	SrcLinear( input, output, resample->factor, resample->frame_size);
diff --git a/pjsip-apps/build/Samples-vc.mak b/pjsip-apps/build/Samples-vc.mak
index c8ae70a..e750b73 100644
--- a/pjsip-apps/build/Samples-vc.mak
+++ b/pjsip-apps/build/Samples-vc.mak
@@ -36,6 +36,7 @@
 
 
 SAMPLES = $(BINDIR)\confsample.exe \
+	  $(BINDIR)\confbench.exe \
 	  $(BINDIR)\level.exe \
 	  $(BINDIR)\playfile.exe \
 	  $(BINDIR)\playsine.exe\
diff --git a/pjsip-apps/build/samples.dsp b/pjsip-apps/build/samples.dsp
index 19641c4..d6df85c 100644
--- a/pjsip-apps/build/samples.dsp
+++ b/pjsip-apps/build/samples.dsp
@@ -86,6 +86,10 @@
 # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
 # Begin Source File
 
+SOURCE=..\src\samples\confbench.c
+# End Source File
+# Begin Source File
+
 SOURCE=..\src\samples\confsample.c
 # End Source File
 # Begin Source File
diff --git a/pjsip-apps/src/samples/confbench.c b/pjsip-apps/src/samples/confbench.c
new file mode 100644
index 0000000..2dae32b
--- /dev/null
+++ b/pjsip-apps/src/samples/confbench.c
@@ -0,0 +1,339 @@
+/* $Id$ */
+/* 
+ * Copyright (C) 2003-2006 Benny Prijono <benny@prijono.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
+ */
+
+/*
+ * Benchmarking pjmedia (conference bridge+resample). For my use only,
+ * and it only works in Win32.
+ */
+
+#include <pjmedia.h>
+#include <pjlib-util.h>	/* pj_getopt */
+#include <pjlib.h>
+#include <stdlib.h>	/* atoi() */
+#include <stdio.h>
+#include <windows.h>
+
+/* For logging purpose. */
+#define THIS_FILE   "confsample.c"
+
+
+/* Configurable:
+ *   LARGE_SET will create in total of about 232 ports.
+ *   HAS_RESAMPLE will activate resampling on about half
+ *     the port.
+ */
+#define TEST_SET	    SMALL_SET
+#define HAS_RESAMPLE	    1
+
+
+#define SMALL_SET	    16
+#define LARGE_SET	    100
+
+
+#define PORT_COUNT	    254
+#define CLOCK_RATE	    16000
+#define SAMPLES_PER_FRAME   (CLOCK_RATE/100)
+#if HAS_RESAMPLE
+#  define SINE_CLOCK	    32000
+#else
+#  define SINE_CLOCK	    CLOCK_RATE
+#endif
+#define SINE_PTIME	    20
+#define DURATION	    10
+
+#define SINE_COUNT	    TEST_SET
+#define NULL_COUNT	    TEST_SET
+#define IDLE_COUNT	    32
+
+
+static void app_perror(const char *sender, const char *title, pj_status_t status)
+{
+    char errmsg[PJ_ERR_MSG_SIZE];
+
+    pj_strerror(status, errmsg, sizeof(errmsg));
+    PJ_LOG(1,(sender, "%s: %s", title, errmsg));
+}
+
+
+struct Times
+{
+    FILETIME	    kernel_time;
+    ULARGE_INTEGER  u_kernel_time;
+    FILETIME	    user_time;
+    ULARGE_INTEGER  u_user_time;
+    ULARGE_INTEGER  u_total;
+};
+
+static void process(struct Times *t)
+{
+    pj_memcpy(&t->u_kernel_time, &t->kernel_time, sizeof(FILETIME));
+    pj_memcpy(&t->u_user_time, &t->user_time, sizeof(FILETIME));
+    t->u_total.QuadPart = t->u_kernel_time.QuadPart + t->u_user_time.QuadPart;
+}
+
+static void benchmark(void)
+{
+    FILETIME creation_time, exit_time;
+    struct Times start, end;
+    DWORD ts, te;
+    LARGE_INTEGER elapsed;
+    BOOL rc;
+    int i;
+    double pct;
+
+    puts("Test started!"); fflush(stdout);
+
+    ts = GetTickCount();
+    rc = GetProcessTimes(GetCurrentProcess(), &creation_time, &exit_time,
+			 &start.kernel_time, &start.user_time);
+    for (i=DURATION; i>0; --i) {
+	printf("\r%d ", i); fflush(stdout);
+	pj_thread_sleep(1000);
+    }
+    rc = GetProcessTimes(GetCurrentProcess(), &creation_time, &exit_time,
+			 &end.kernel_time, &end.user_time);
+    te = GetTickCount();
+
+    process(&start);
+    process(&end);
+
+    elapsed.QuadPart = end.u_total.QuadPart - start.u_total.QuadPart;
+
+    pct = elapsed.QuadPart * 100.0 / ((te-ts)*10000.0);
+
+    printf("CPU usage=%6.4f%%\n", pct); fflush(stdout);
+}
+
+
+
+/* Struct attached to sine generator */
+typedef struct
+{
+    pj_int16_t	*samples;	/* Sine samples.    */
+} port_data;
+
+
+/* This callback is called to feed more samples */
+static pj_status_t sine_get_frame( pjmedia_port *port, 
+				   pjmedia_frame *frame)
+{
+    port_data *sine = port->user_data;
+    pj_int16_t *samples = frame->buf;
+    unsigned i, count, left, right;
+
+    /* Get number of samples */
+    count = frame->size / 2 / port->info.channel_count;
+
+    left = 0;
+    right = 0;
+
+    for (i=0; i<count; ++i) {
+	*samples++ = sine->samples[left];
+	++left;
+
+	if (port->info.channel_count == 2) {
+	    *samples++ = sine->samples[right];
+	    right += 2; /* higher pitch so we can distinguish left and right. */
+	    if (right >= count)
+		right = 0;
+	}
+    }
+
+    /* Must set frame->type correctly, otherwise the sound device
+     * will refuse to play.
+     */
+    frame->type = PJMEDIA_FRAME_TYPE_AUDIO;
+
+    return PJ_SUCCESS;
+}
+
+#ifndef M_PI
+#define M_PI  (3.14159265)
+#endif
+
+/*
+ * Create a media port to generate sine wave samples.
+ */
+static pj_status_t create_sine_port(pj_pool_t *pool,
+				    unsigned sampling_rate,
+				    unsigned channel_count,
+				    pjmedia_port **p_port)
+{
+    pjmedia_port *port;
+    unsigned i;
+    unsigned count;
+    port_data *sine;
+
+    PJ_ASSERT_RETURN(pool && channel_count > 0 && channel_count <= 2, 
+		     PJ_EINVAL);
+
+    port = pj_pool_zalloc(pool, sizeof(pjmedia_port));
+    PJ_ASSERT_RETURN(port != NULL, PJ_ENOMEM);
+
+    /* Fill in port info. */
+    port->info.bits_per_sample = 16;
+    port->info.channel_count = channel_count;
+    port->info.encoding_name = pj_str("pcm");
+    port->info.has_info = 1;
+    port->info.name = pj_str("sine generator");
+    port->info.need_info = 0;
+    port->info.pt = 0xFF;
+    port->info.clock_rate = sampling_rate;
+    port->info.samples_per_frame = sampling_rate * SINE_PTIME / 1000 * channel_count;
+    port->info.bytes_per_frame = port->info.samples_per_frame * 2;
+    port->info.type = PJMEDIA_TYPE_AUDIO;
+    
+    /* Set the function to feed frame */
+    port->get_frame = &sine_get_frame;
+
+    /* Create sine port data */
+    port->user_data = sine = pj_pool_zalloc(pool, sizeof(port_data));
+
+    /* Create samples */
+    count = port->info.samples_per_frame / channel_count;
+    sine->samples = pj_pool_alloc(pool, count * sizeof(pj_int16_t));
+    PJ_ASSERT_RETURN(sine->samples != NULL, PJ_ENOMEM);
+
+    /* initialise sinusoidal wavetable */
+    for( i=0; i<count; i++ )
+    {
+        sine->samples[i] = (pj_int16_t) (10000.0 * 
+		sin(((double)i/(double)count) * M_PI * 8.) );
+    }
+
+    *p_port = port;
+
+    return PJ_SUCCESS;
+}
+
+int main()
+{
+    pj_caching_pool cp;
+    pjmedia_endpt *med_endpt;
+    pj_pool_t *pool;
+    pjmedia_conf *conf;
+    int i;
+    pjmedia_port *sine_port[SINE_COUNT], *null_port, *conf_port;
+    pjmedia_port *nulls[NULL_COUNT];
+    unsigned null_slots[NULL_COUNT];
+    pjmedia_master_port *master_port;
+    pj_status_t status;
+
+
+    pj_log_set_level(3);
+
+    status = pj_init();
+    PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+
+    pj_caching_pool_init(&cp, &pj_pool_factory_default_policy, 0);
+    pool = pj_pool_create( &cp.factory,	    /* pool factory	    */
+			   "wav",	    /* pool name.	    */
+			   4000,	    /* init size	    */
+			   4000,	    /* increment size	    */
+			   NULL		    /* callback on error    */
+			   );
+
+    status = pjmedia_endpt_create(&cp.factory, NULL, 1, &med_endpt);
+    PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+
+
+
+    status = pjmedia_conf_create( pool,
+				  PORT_COUNT,
+				  CLOCK_RATE,
+				  1, SAMPLES_PER_FRAME, 16,
+				  PJMEDIA_CONF_NO_DEVICE,
+				  &conf);
+    if (status != PJ_SUCCESS) {
+	app_perror(THIS_FILE, "Unable to create conference bridge", status);
+	return 1;
+    }
+
+    /* Create Null ports */
+    for (i=0; i<NULL_COUNT; ++i) {
+	status = pjmedia_null_port_create(pool, CLOCK_RATE, 1, SAMPLES_PER_FRAME*2, 16, &nulls[i]);
+	PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+
+	status = pjmedia_conf_add_port(conf, pool, nulls[i], NULL, &null_slots[i]);
+	PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+    }
+
+    /* Create sine ports. */
+    for (i=0; i<SINE_COUNT; ++i) {
+	unsigned j, slot;
+
+	/* Load the WAV file to file port. */
+	status = create_sine_port(pool, SINE_CLOCK, 1, &sine_port[i]);
+	PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+
+	/* Add the file port to conference bridge */
+	status = pjmedia_conf_add_port( conf,		/* The bridge	    */
+					pool,		/* pool		    */
+					sine_port[i],	/* port to connect  */
+					NULL,		/* Use port's name  */
+					&slot		/* ptr for slot #   */
+					);
+	if (status != PJ_SUCCESS) {
+	    app_perror(THIS_FILE, "Unable to add conference port", status);
+	    return 1;
+	}
+
+	status = pjmedia_conf_connect_port(conf, slot, 0, 0);
+	PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+
+	for (j=0; j<NULL_COUNT; ++j) {
+	    status = pjmedia_conf_connect_port(conf, slot, null_slots[j], 0);
+	    PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+	}
+    }
+
+    /* Create idle ports */
+    for (i=0; i<IDLE_COUNT; ++i) {
+	pjmedia_port *dummy;
+	status = pjmedia_null_port_create(pool, CLOCK_RATE, 1, SAMPLES_PER_FRAME, 16, &dummy);
+	PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+	status = pjmedia_conf_add_port(conf, pool, dummy, NULL, NULL);
+	PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+    }
+
+    /* Create null port */
+    status = pjmedia_null_port_create(pool, CLOCK_RATE, 1, SAMPLES_PER_FRAME, 16,
+				      &null_port);
+    PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+
+    conf_port = pjmedia_conf_get_master_port(conf);
+
+    /* Create master port */
+    status = pjmedia_master_port_create(pool, null_port, conf_port, 0, &master_port);
+
+
+    pjmedia_master_port_start(master_port);
+
+    puts("Waiting to settle.."); fflush(stdout);
+    pj_thread_sleep(5000);
+
+
+    benchmark();
+
+
+    /* Done. */
+    return 0;
+}
+
+
diff --git a/pjsip-apps/src/samples/confsample.c b/pjsip-apps/src/samples/confsample.c
index 539a2c0..7ae80cd 100644
--- a/pjsip-apps/src/samples/confsample.c
+++ b/pjsip-apps/src/samples/confsample.c
@@ -475,12 +475,11 @@
 	pjmedia_conf_port_info *port_info = &info[i];	
 	
 	txlist[0] = '\0';
-	for (j=0; j<count; ++j) {
+	for (j=0; j<port_info->listener_cnt; ++j) {
 	    char s[10];
-	    if (port_info->listener[j]) {
-		pj_ansi_sprintf(s, "#%d ", j);
-		pj_ansi_strcat(txlist, s);
-	    }
+	    pj_ansi_sprintf(s, "#%d ", port_info->listener_slots[j]);
+	    pj_ansi_strcat(txlist, s);
+
 	}
 
 	if (txlist[0] == '\0') {
diff --git a/pjsip/src/pjsua-lib/pjsua_media.c b/pjsip/src/pjsua-lib/pjsua_media.c
index f5c3c9b..2dddd63 100644
--- a/pjsip/src/pjsua-lib/pjsua_media.c
+++ b/pjsip/src/pjsua-lib/pjsua_media.c
@@ -548,11 +548,9 @@
     info->bits_per_sample = cinfo.bits_per_sample;
 
     /* Build array of listeners */
-    count = pjsua_var.media_cfg.max_media_ports;
-    for (i=0; i<count; ++i) {
-	if (cinfo.listener[i]) {
-	    info->listeners[info->listener_cnt++] = i;
-	}
+    info->listener_cnt = cinfo.listener_cnt;
+    for (i=0; i<cinfo.listener_cnt; ++i) {
+	info->listeners[i] = cinfo.listener_slots[i];
     }
 
     return PJ_SUCCESS;