Ticket #588: Improvements to echo cancellation framework

git-svn-id: https://svn.pjsip.org/repos/pjproject/trunk@2198 74dad513-b988-da41-8d7b-12977e46ad98
diff --git a/pjmedia/build/pjmedia.dsp b/pjmedia/build/pjmedia.dsp
index f0ce7c3..5c8a96b 100644
--- a/pjmedia/build/pjmedia.dsp
+++ b/pjmedia/build/pjmedia.dsp
@@ -125,6 +125,10 @@
 # End Source File

 # Begin Source File

 

+SOURCE=..\src\pjmedia\echo_internal.h

+# End Source File

+# Begin Source File

+

 SOURCE=..\src\pjmedia\echo_port.c

 # End Source File

 # Begin Source File

diff --git a/pjmedia/include/pjmedia/echo.h b/pjmedia/include/pjmedia/echo.h
index 53e262f..4a6c28f 100644
--- a/pjmedia/include/pjmedia/echo.h
+++ b/pjmedia/include/pjmedia/echo.h
@@ -57,18 +57,37 @@
 typedef enum pjmedia_echo_flag
 {
     /**
+     * Use any available backend echo canceller algorithm. This is
+     * the default settings. This setting is mutually exclusive with
+     * PJMEDIA_ECHO_SIMPLE and PJMEDIA_ECHO_SPEEX.
+     */
+    PJMEDIA_ECHO_DEFAULT= 0,
+
+    /**
+     * Force to use Speex AEC as the backend echo canceller algorithm.
+     * This setting is mutually exclusive with PJMEDIA_ECHO_SIMPLE.
+     */
+    PJMEDIA_ECHO_SPEEX	= 1,
+
+    /**
      * If PJMEDIA_ECHO_SIMPLE flag is specified during echo canceller
      * creation, then a simple echo suppressor will be used instead of
-     * an accoustic echo cancellation.
+     * an accoustic echo cancellation. This setting is mutually exclusive
+     * with PJMEDIA_ECHO_SPEEX.
      */
-    PJMEDIA_ECHO_SIMPLE	= 1,
+    PJMEDIA_ECHO_SIMPLE	= 2,
+
+    /**
+     * For internal use.
+     */
+    PJMEDIA_ECHO_ALGO_MASK = 15,
 
     /**
      * If PJMEDIA_ECHO_NO_LOCK flag is specified, no mutex will be created
      * for the echo canceller, but application will guarantee that echo
      * canceller will not be called by different threads at the same time.
      */
-    PJMEDIA_ECHO_NO_LOCK = 2
+    PJMEDIA_ECHO_NO_LOCK = 16
 
 } pjmedia_echo_flag;
 
@@ -102,6 +121,34 @@
 					 unsigned options,
 					 pjmedia_echo_state **p_echo );
 
+/**
+ * Create multi-channel the echo canceller. 
+ *
+ * @param pool		    Pool to allocate memory.
+ * @param clock_rate	    Media clock rate/sampling rate.
+ * @param channel_count	    Number of channels.
+ * @param samples_per_frame Number of samples per frame.
+ * @param tail_ms	    Tail length, miliseconds.
+ * @param latency_ms	    Total lacency introduced by playback and 
+ *			    recording device. Set to zero if the latency
+ *			    is not known.
+ * @param options	    Options. If PJMEDIA_ECHO_SIMPLE is specified,
+ *			    then a simple echo suppressor implementation 
+ *			    will be used instead of an accoustic echo 
+ *			    cancellation.
+ *			    See #pjmedia_echo_flag for other options.
+ * @param p_echo	    Pointer to receive the Echo Canceller state.
+ *
+ * @return		    PJ_SUCCESS on success, or the appropriate status.
+ */
+PJ_DECL(pj_status_t) pjmedia_echo_create2(pj_pool_t *pool,
+					  unsigned clock_rate,
+					  unsigned channel_count,
+					  unsigned samples_per_frame,
+					  unsigned tail_ms,
+					  unsigned latency_ms,
+					  unsigned options,
+					  pjmedia_echo_state **p_echo );
 
 /**
  * Destroy the Echo Canceller. 
@@ -114,7 +161,17 @@
 
 
 /**
- * Let the Echo Canceller knows that a frame has been played to the speaker.
+ * Reset the echo canceller.
+ *
+ * @param echo		The Echo Canceller.
+ *
+ * @return		PJ_SUCCESS on success.
+ */
+PJ_DECL(pj_status_t) pjmedia_echo_reset(pjmedia_echo_state *echo );
+
+
+/**
+ * Let the Echo Canceller know that a frame has been played to the speaker.
  * The Echo Canceller will keep the frame in its internal buffer, to be used
  * when cancelling the echo with #pjmedia_echo_capture().
  *
@@ -131,10 +188,9 @@
 
 
 /**
- * Let the Echo Canceller knows that a frame has been captured from 
- * the microphone.
- * The Echo Canceller will cancel the echo from the captured signal, 
- * using the internal buffer (supplied by #pjmedia_echo_playback()) 
+ * Let the Echo Canceller know that a frame has been captured from the 
+ * microphone. The Echo Canceller will cancel the echo from the captured
+ * signal, using the internal buffer (supplied by #pjmedia_echo_playback())
  * as the FES (Far End Speech) reference.
  *
  * @param echo		The Echo Canceller.
diff --git a/pjmedia/src/pjmedia/echo_common.c b/pjmedia/src/pjmedia/echo_common.c
index a7e6b1b..633e78a 100644
--- a/pjmedia/src/pjmedia/echo_common.c
+++ b/pjmedia/src/pjmedia/echo_common.c
@@ -17,36 +17,56 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  */
 
-#include <pjmedia/config.h>
 #include <pjmedia/echo.h>
+#include <pjmedia/delaybuf.h>
+#include <pjmedia/errno.h>
 #include <pj/assert.h>
+#include <pj/list.h>
+#include <pj/log.h>
 #include <pj/pool.h>
 #include "echo_internal.h"
 
+#define THIS_FILE   "echo_common.c"
+
 typedef struct ec_operations ec_operations;
 
+struct frame
+{
+    PJ_DECL_LIST_MEMBER(struct frame);
+    short   buf[1];
+};
+
 struct pjmedia_echo_state
 {
+    pj_pool_t	    *pool;
+    char	    *obj_name;
+    unsigned	     samples_per_frame;
     void	    *state;
     ec_operations   *op;
+
+    pj_bool_t	     lat_ready;	    /* lat_buf has been filled in.	    */
+    unsigned	     lat_target_cnt;/* Target number of frames in lat_buf   */
+    unsigned	     lat_buf_cnt;   /* Actual number of frames in lat_buf   */
+    struct frame     lat_buf;	    /* Frame queue for delayed playback	    */
+    struct frame     lat_free;	    /* Free frame list.			    */
+
+    pjmedia_delay_buf	*delay_buf;
 };
 
 
 struct ec_operations
 {
+    const char *name;
+
     pj_status_t (*ec_create)(pj_pool_t *pool,
-			    unsigned clock_rate,
-			    unsigned samples_per_frame,
-			    unsigned tail_ms,
-			    unsigned latency_ms,
-			    unsigned options,
-			    void **p_state );
+			     unsigned clock_rate,
+			     unsigned channel_count,
+			     unsigned samples_per_frame,
+			     unsigned tail_ms,
+			     unsigned options,
+			     void **p_state );
     pj_status_t (*ec_destroy)(void *state );
-    pj_status_t (*ec_playback)(void *state,
-			      pj_int16_t *play_frm );
-    pj_status_t (*ec_capture)(void *state,
-			      pj_int16_t *rec_frm,
-			      unsigned options );
+    void        (*ec_reset)(void *state );
     pj_status_t (*ec_cancel)(void *state,
 			     pj_int16_t *rec_frm,
 			     const pj_int16_t *play_frm,
@@ -57,10 +77,10 @@
 
 static struct ec_operations echo_supp_op = 
 {
+    "Echo suppressor",
     &echo_supp_create,
     &echo_supp_destroy,
-    &echo_supp_playback,
-    &echo_supp_capture,
+    &echo_supp_reset,
     &echo_supp_cancel_echo
 };
 
@@ -70,20 +90,30 @@
  * Speex AEC prototypes
  */
 #if defined(PJMEDIA_HAS_SPEEX_AEC) && PJMEDIA_HAS_SPEEX_AEC!=0
-static struct ec_operations aec_op = 
+static struct ec_operations speex_aec_op = 
 {
+    "AEC",
     &speex_aec_create,
     &speex_aec_destroy,
-    &speex_aec_playback,
-    &speex_aec_capture,
+    &speex_aec_reset,
     &speex_aec_cancel_echo
 };
-
-#else
-#define aec_op echo_supp_op
 #endif
 
 
+/*
+ * IPP AEC prototypes
+ */
+#if defined(PJMEDIA_HAS_INTEL_IPP_AEC) && PJMEDIA_HAS_INTEL_IPP_AEC!=0
+static struct ec_operations ipp_aec_op = 
+{
+    "IPP AEC",
+    &ipp_aec_create,
+    &ipp_aec_destroy,
+    &ipp_aec_reset,
+    &ipp_aec_cancel_echo
+};
+#endif
 
 /*
  * Create the echo canceller. 
@@ -96,34 +126,106 @@
 					 unsigned options,
 					 pjmedia_echo_state **p_echo )
 {
+    return pjmedia_echo_create2(pool, clock_rate, 1, samples_per_frame,
+				tail_ms, latency_ms, options, p_echo);
+}
+
+/*
+ * Create the echo canceller. 
+ */
+PJ_DEF(pj_status_t) pjmedia_echo_create2(pj_pool_t *pool,
+					 unsigned clock_rate,
+					 unsigned channel_count,
+					 unsigned samples_per_frame,
+					 unsigned tail_ms,
+					 unsigned latency_ms,
+					 unsigned options,
+					 pjmedia_echo_state **p_echo )
+{
+    unsigned ptime;
     pjmedia_echo_state *ec;
     pj_status_t status;
 
-    /* Force to use simple echo suppressor if AEC is not available */
-#if !defined(PJMEDIA_HAS_SPEEX_AEC) || PJMEDIA_HAS_SPEEX_AEC==0
-    options |= PJMEDIA_ECHO_SIMPLE;
+    /* Create new pool and instantiate and init the EC */
+    pool = pj_pool_create(pool->factory, "ec%p", 256, 256, NULL);
+    ec = PJ_POOL_ZALLOC_T(pool, struct pjmedia_echo_state);
+    ec->pool = pool;
+    ec->obj_name = pool->obj_name;
+    pj_list_init(&ec->lat_buf);
+    pj_list_init(&ec->lat_free);
+
+    /* Select the backend algorithm */
+    if (0) {
+	/* Dummy */
+	;
+#if defined(PJMEDIA_HAS_SPEEX_AEC) && PJMEDIA_HAS_SPEEX_AEC!=0
+    } else if ((options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_SPEEX ||
+	       (options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_DEFAULT) 
+    {
+	ec->op = &speex_aec_op;
 #endif
 
-    ec = PJ_POOL_ZALLOC_T(pool, struct pjmedia_echo_state);
+#if defined(PJMEDIA_HAS_INTEL_IPP_AEC) && PJMEDIA_HAS_INTEL_IPP_AEC!=0
+    } else if ((options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_IPP ||
+	       (options & PJMEDIA_ECHO_ALGO_MASK) == PJMEDIA_ECHO_DEFAULT)
+    {
+	ec->op = &ipp_aec_op;
 
-    if (options & PJMEDIA_ECHO_SIMPLE) {
-	ec->op = &echo_supp_op;
-	status = (*echo_supp_op.ec_create)(pool, clock_rate, samples_per_frame,
-					   tail_ms, latency_ms, options,
-					   &ec->state);
+#endif
+
     } else {
-	ec->op = &aec_op;
-	status = (*aec_op.ec_create)(pool, clock_rate, 
-				     samples_per_frame,
-				     tail_ms, latency_ms, options,
-				     &ec->state);
+	ec->op = &echo_supp_op;
     }
 
-    if (status != PJ_SUCCESS)
+    PJ_LOG(5,(ec->obj_name, "Creating %s", ec->op->name));
+
+    /* Instantiate EC object */
+    status = (*ec->op->ec_create)(pool, clock_rate, channel_count, 
+				  samples_per_frame, tail_ms, 
+				  options, &ec->state);
+    if (status != PJ_SUCCESS) {
+	pj_pool_release(pool);
 	return status;
+    }
 
-    pj_assert(ec->state != NULL);
+    /* Create latency buffers */
+    ptime = samples_per_frame * 1000 / clock_rate;
+    if (latency_ms == 0) {
+	/* Give at least one frame delay to simplify programming */
+	latency_ms = ptime;
+    }
+    ec->lat_target_cnt = latency_ms / ptime;
+    if (ec->lat_target_cnt != 0) {
+	unsigned i;
+	for (i=0; i < ec->lat_target_cnt; ++i)  {
+	    struct frame *frm;
 
+	    frm = (struct frame*) pj_pool_alloc(pool, (samples_per_frame<<1) +
+						      sizeof(struct frame));
+	    pj_list_push_back(&ec->lat_free, frm);
+	}
+    } else {
+	ec->lat_ready = PJ_TRUE;
+    }
+
+    /* Create delay buffer to compensate drifts */
+    status = pjmedia_delay_buf_create(ec->pool, ec->obj_name, clock_rate, 
+				      samples_per_frame, channel_count,
+				      (PJMEDIA_SOUND_BUFFER_COUNT+1) * ptime,
+				      0, &ec->delay_buf);
+    if (status != PJ_SUCCESS) {
+	pj_pool_release(pool);
+	return status;
+    }
+
+    PJ_LOG(4,(ec->obj_name, 
+	      "%s created, clock_rate=%d, channel=%d, "
+	      "samples per frame=%d, tail length=%d ms, "
+	      "latency=%d ms", 
+	      ec->op->name, clock_rate, channel_count, samples_per_frame,
+	      tail_ms, latency_ms));
+
+    /* Done */
     *p_echo = ec;
 
     return PJ_SUCCESS;
@@ -135,18 +237,63 @@
  */
 PJ_DEF(pj_status_t) pjmedia_echo_destroy(pjmedia_echo_state *echo )
 {
-    return (*echo->op->ec_destroy)(echo->state);
+    (*echo->op->ec_destroy)(echo->state);
+    pj_pool_release(echo->pool);
+    return PJ_SUCCESS;
 }
 
 
+/*
+ * Reset the echo canceller.
+ */
+PJ_DEF(pj_status_t) pjmedia_echo_reset(pjmedia_echo_state *echo )
+{
+    while (!pj_list_empty(&echo->lat_buf)) {
+	struct frame *frm;
+	frm = echo->lat_buf.next;
+	pj_list_erase(frm);
+	pj_list_push_back(&echo->lat_free, frm);
+    }
+    echo->lat_ready = PJ_FALSE;
+    pjmedia_delay_buf_reset(echo->delay_buf);
+    echo->op->ec_reset(echo->state);
+    return PJ_SUCCESS;
+}
+
 
 /*
- * Let the Echo Canceller knows that a frame has been played to the speaker.
+ * Let the Echo Canceller know that a frame has been played to the speaker.
  */
 PJ_DEF(pj_status_t) pjmedia_echo_playback( pjmedia_echo_state *echo,
 					   pj_int16_t *play_frm )
 {
-    return (*echo->op->ec_playback)(echo->state, play_frm);
+    if (!echo->lat_ready) {
+	/* We've not built enough latency in the buffer, so put this frame
+	 * in the latency buffer list.
+	 */
+	struct frame *frm;
+
+	if (pj_list_empty(&echo->lat_free)) {
+	    echo->lat_ready = PJ_TRUE;
+	    PJ_LOG(5,(echo->obj_name, "Latency bufferring complete"));
+	    pjmedia_delay_buf_put(echo->delay_buf, play_frm);
+	    return PJ_SUCCESS;
+	}
+	    
+	frm = echo->lat_free.prev;
+	pj_list_erase(frm);
+
+	pjmedia_copy_samples(frm->buf, play_frm, echo->samples_per_frame);
+	pj_list_push_back(&echo->lat_buf, frm);
+
+    } else {
+	/* Latency buffer is ready (full), so we put this frame in the
+	 * delay buffer.
+	 */
+	pjmedia_delay_buf_put(echo->delay_buf, play_frm);
+    }
+
+    return PJ_SUCCESS;
 }
 
 
@@ -158,7 +305,34 @@
 					  pj_int16_t *rec_frm,
 					  unsigned options )
 {
-    return (*echo->op->ec_capture)(echo->state, rec_frm, options);
+    struct frame *oldest_frm;
+    pj_status_t status, rc;
+
+    if (!echo->lat_ready) {
+	/* Prefetching to fill in the desired latency */
+	PJ_LOG(5,(echo->obj_name, "Prefetching.."));
+	return PJ_SUCCESS;
+    }
+
+    /* Retrieve oldest frame from the latency buffer */
+    oldest_frm = echo->lat_buf.next;
+    pj_list_erase(oldest_frm);
+
+    /* Cancel echo using this reference frame */
+    status = pjmedia_echo_cancel(echo, rec_frm, oldest_frm->buf, 
+				 options, NULL);
+
+    /* Move one frame from delay buffer to the latency buffer. */
+    rc = pjmedia_delay_buf_get(echo->delay_buf, oldest_frm->buf);
+    if (rc != PJ_SUCCESS) {
+	/* Ooops.. no frame! */
+	PJ_LOG(5,(echo->obj_name, 
+		  "No frame from delay buffer. This will upset EC later"));
+	pjmedia_zero_samples(oldest_frm->buf, echo->samples_per_frame);
+    }
+    pj_list_push_back(&echo->lat_buf, oldest_frm);
+    
+    return status;
 }
 
 
diff --git a/pjmedia/src/pjmedia/echo_internal.h b/pjmedia/src/pjmedia/echo_internal.h
index c382abb..6b6a4b6 100644
--- a/pjmedia/src/pjmedia/echo_internal.h
+++ b/pjmedia/src/pjmedia/echo_internal.h
@@ -28,17 +28,13 @@
  */
 PJ_DECL(pj_status_t) echo_supp_create(pj_pool_t *pool,
 				      unsigned clock_rate,
+				      unsigned channel_count,
 				      unsigned samples_per_frame,
 				      unsigned tail_ms,
-				      unsigned latency_ms,
 				      unsigned options,
 				      void **p_state );
 PJ_DECL(pj_status_t) echo_supp_destroy(void *state);
-PJ_DECL(pj_status_t) echo_supp_playback(void *state,
-					pj_int16_t *play_frm );
-PJ_DECL(pj_status_t) echo_supp_capture(void *state,
-				       pj_int16_t *rec_frm,
-				       unsigned options );
+PJ_DECL(void) echo_supp_reset(void *state);
 PJ_DECL(pj_status_t) echo_supp_cancel_echo(void *state,
 					   pj_int16_t *rec_frm,
 					   const pj_int16_t *play_frm,
@@ -47,23 +43,34 @@
 
 PJ_DECL(pj_status_t) speex_aec_create(pj_pool_t *pool,
 				      unsigned clock_rate,
+				      unsigned channel_count,
 				      unsigned samples_per_frame,
 				      unsigned tail_ms,
-				      unsigned latency_ms,
 				      unsigned options,
 				      void **p_state );
 PJ_DECL(pj_status_t) speex_aec_destroy(void *state );
-PJ_DECL(pj_status_t) speex_aec_playback(void *state,
-				        pj_int16_t *play_frm );
-PJ_DECL(pj_status_t) speex_aec_capture(void *state,
-				       pj_int16_t *rec_frm,
-				       unsigned options );
+PJ_DECL(void) speex_aec_reset(void *state );
 PJ_DECL(pj_status_t) speex_aec_cancel_echo(void *state,
 					   pj_int16_t *rec_frm,
 					   const pj_int16_t *play_frm,
 					   unsigned options,
 					   void *reserved );
 
+PJ_DECL(pj_status_t) ipp_aec_create(pj_pool_t *pool,
+				    unsigned clock_rate,
+				    unsigned channel_count,
+				    unsigned samples_per_frame,
+				    unsigned tail_ms,
+				    unsigned options,
+				    void **p_echo );
+PJ_DECL(pj_status_t) ipp_aec_destroy(void *state );
+PJ_DECL(void) ipp_aec_reset(void *state );
+PJ_DECL(pj_status_t) ipp_aec_cancel_echo(void *state,
+					 pj_int16_t *rec_frm,
+					 const pj_int16_t *play_frm,
+					 unsigned options,
+					 void *reserved );
+
 
 PJ_END_DECL
 
diff --git a/pjmedia/src/pjmedia/echo_port.c b/pjmedia/src/pjmedia/echo_port.c
index 5d36e13..1b1c89c 100644
--- a/pjmedia/src/pjmedia/echo_port.c
+++ b/pjmedia/src/pjmedia/echo_port.c
@@ -67,9 +67,10 @@
 			   dn_port->info.bits_per_sample,
 			   dn_port->info.samples_per_frame);
 
-    status = pjmedia_echo_create(pool, dn_port->info.clock_rate, 
-				 dn_port->info.samples_per_frame,
-				 tail_ms, latency_ms, options, &ec->ec);
+    status = pjmedia_echo_create2(pool, dn_port->info.clock_rate, 
+				  dn_port->info.channel_count,
+				  dn_port->info.samples_per_frame,
+				  tail_ms, latency_ms, options, &ec->ec);
     if (status != PJ_SUCCESS)
 	return status;
 
diff --git a/pjmedia/src/pjmedia/echo_speex.c b/pjmedia/src/pjmedia/echo_speex.c
index d7f2750..033597e 100644
--- a/pjmedia/src/pjmedia/echo_speex.c
+++ b/pjmedia/src/pjmedia/echo_speex.c
@@ -19,221 +19,13 @@
 
 #include <pjmedia/echo.h>
 #include <pjmedia/errno.h>
-#include <pjmedia/silencedet.h>
 #include <pj/assert.h>
-#include <pj/lock.h>
-#include <pj/log.h>
-#include <pj/os.h>
 #include <pj/pool.h>
 #include <speex/speex_echo.h>
 #include <speex/speex_preprocess.h>
 
 #include "echo_internal.h"
 
-#define THIS_FILE	"echo_speex.c"
-#define BUF_COUNT	PJMEDIA_SOUND_BUFFER_COUNT
-#define MIN_PREFETCH	2
-#define MAX_PREFETCH	(BUF_COUNT*2/3)
-
-
-
-#if 0
-# define TRACE_(expr)  PJ_LOG(5,expr)
-#else
-# define TRACE_(expr)
-#endif
-
-
-typedef struct pjmedia_frame_queue pjmedia_frame_queue;
-
-struct fq_frame
-{
-    PJ_DECL_LIST_MEMBER(struct fq_frame);
-    void	*buf;
-    unsigned	 size;
-    pj_uint32_t	 seq;
-};
-
-struct pjmedia_frame_queue
-{
-    char	     obj_name[PJ_MAX_OBJ_NAME];
-    unsigned	     frame_size;
-    int		     samples_per_frame;
-    unsigned	     count;
-    unsigned	     max_count;
-    struct fq_frame  frame_list;
-    struct fq_frame  free_list;
-
-    int		     seq_delay;
-    int		     prefetch_count;
-};
-
-PJ_DEF(pj_status_t) pjmedia_frame_queue_create( pj_pool_t *pool,
-					        const char *name,
-					        unsigned frame_size,
-					        unsigned samples_per_frame,
-					        unsigned max_count,
-					        pjmedia_frame_queue **p_fq)
-{
-    pjmedia_frame_queue *fq;
-    unsigned i;
-
-    fq = PJ_POOL_ZALLOC_T(pool, pjmedia_frame_queue);
-
-    pj_ansi_snprintf(fq->obj_name, sizeof(fq->obj_name), name, fq);
-    fq->obj_name[sizeof(fq->obj_name)-1] = '\0';
-
-    fq->max_count = max_count;
-    fq->frame_size = frame_size;
-    fq->samples_per_frame = samples_per_frame;
-    fq->count = 0;
-
-    pj_list_init(&fq->frame_list);
-    pj_list_init(&fq->free_list);
-
-    for (i=0; i<max_count; ++i) {
-	struct fq_frame *f;
-
-	f = PJ_POOL_ZALLOC_T(pool, struct fq_frame);
-	f->buf = pj_pool_alloc(pool, frame_size);
-
-	pj_list_push_back(&fq->free_list, f);
-	
-    }
-
-    *p_fq = fq;
-    return PJ_SUCCESS;
-}
-
-PJ_DEF(pj_status_t) pjmedia_frame_queue_init( pjmedia_frame_queue *fq,
-					      int seq_delay,
-					      int prefetch_count)
-{
-    if (prefetch_count > MAX_PREFETCH)
-	prefetch_count = MAX_PREFETCH;
-
-    fq->seq_delay = seq_delay;
-    fq->prefetch_count = prefetch_count;
-    fq->count = 0;
-    pj_list_merge_first(&fq->free_list, &fq->frame_list);
-
-    PJ_LOG(5,(fq->obj_name, "AEC reset, delay=%d, prefetch=%d", 
-	      fq->seq_delay, fq->prefetch_count));
-
-    return PJ_SUCCESS;
-}
-
-PJ_DEF(pj_bool_t) pjmedia_frame_queue_empty( pjmedia_frame_queue *fq )
-{
-    return pj_list_empty(&fq->frame_list);
-}
-
-PJ_DEF(int) pjmedia_frame_queue_get_prefetch( pjmedia_frame_queue *fq )
-{
-    return fq->prefetch_count;
-}
-
-PJ_DEF(pj_status_t) pjmedia_frame_queue_put( pjmedia_frame_queue *fq,
-					     const void *framebuf,
-					     unsigned size,
-					     pj_uint32_t timestamp )
-{
-    struct fq_frame *f;
-
-    TRACE_((fq->obj_name, "PUT seq=%d, count=%d", 
-	    timestamp / fq->samples_per_frame, fq->count));
-
-    if (pj_list_empty(&fq->free_list)) {
-	PJ_LOG(5,(fq->obj_name, 
-		  " AEC info: queue is full, frame discarded "
-		  "[count=%d, seq=%d]",
-		  fq->max_count, timestamp / fq->samples_per_frame));
-	//pjmedia_frame_queue_init(fq, fq->seq_delay, fq->prefetch_count);
-	return PJ_ETOOMANY;
-    }
-
-    PJ_ASSERT_RETURN(size <= fq->frame_size, PJ_ETOOBIG);
-
-    f = fq->free_list.next;
-    pj_list_erase(f);
-
-    pj_memcpy(f->buf, framebuf, size);
-    f->size = size;
-    f->seq = timestamp / fq->samples_per_frame;
-
-    pj_list_push_back(&fq->frame_list, f);
-    ++fq->count;
-
-    return PJ_SUCCESS;
-}
-
-PJ_DEF(pj_status_t) pjmedia_frame_queue_get( pjmedia_frame_queue *fq,
-					     pj_uint32_t get_timestamp,
-					     void **framebuf,
-					     unsigned *size )
-{
-    pj_uint32_t frame_seq;
-    struct fq_frame *f;
-
-    frame_seq = get_timestamp/fq->samples_per_frame + fq->seq_delay -
-		fq->prefetch_count;
-
-    TRACE_((fq->obj_name, "GET seq=%d for seq=%d delay=%d, prefetch=%d", 
-	    get_timestamp/fq->samples_per_frame, frame_seq, fq->seq_delay, 
-	    fq->prefetch_count));
-
-    *size = 0;
-
-    /* Remove old frames */
-    for (;!pj_list_empty(&fq->frame_list);) {
-	f = fq->frame_list.next;
-	if (f->seq >= frame_seq)
-	    break;
-
-	PJ_LOG(5,(fq->obj_name, 
-		  " AEC Info: old frame removed (seq=%d, want=%d, count=%d)",
-		  f->seq, frame_seq, fq->count));
-	pj_list_erase(f);
-	--fq->count;
-	pj_list_push_back(&fq->free_list, f);
-    }
-
-    if (pj_list_empty(&fq->frame_list)) {
-	PJ_LOG(5,(fq->obj_name, 
-		  " AEC Info: empty queue for seq=%d!",
-		  frame_seq));
-	return PJ_ENOTFOUND;
-    }
-
-    f = fq->frame_list.next;
-
-    if (f->seq > frame_seq) {
-	PJ_LOG(5,(fq->obj_name, 
-		  " AEC Info: prefetching (first seq=%d)",
-		  f->seq));
-	return -1;
-    }
-
-    pj_list_erase(f);
-    --fq->count;
-
-    *framebuf = (void*)f->buf;
-    *size = f->size;
-
-    TRACE_((fq->obj_name, " returning frame with seq=%d, count=%d", 
-	    f->seq, fq->count));
-
-    pj_list_push_front(&fq->free_list, f);
-    return PJ_SUCCESS;
-}
-
-enum
-{
-    TS_FLAG_PLAY = 1,
-    TS_FLAG_REC	 = 2,
-    TS_FLAG_OK	 = 3,
-};
-
 typedef struct speex_ec
 {
     SpeexEchoState	 *state;
@@ -243,14 +35,6 @@
     unsigned		  prefetch;
     unsigned		  options;
     pj_int16_t		 *tmp_frame;
-    spx_int32_t		 *residue;
-
-    pj_uint32_t		  play_ts,
-			  rec_ts,
-			  ts_flag;
-
-    pjmedia_frame_queue	 *frame_queue;
-    pj_lock_t		 *lock;		/* To protect buffers, if required  */
 } speex_ec;
 
 
@@ -260,43 +44,33 @@
  */
 PJ_DEF(pj_status_t) speex_aec_create(pj_pool_t *pool,
 				     unsigned clock_rate,
+				     unsigned channel_count,
 				     unsigned samples_per_frame,
 				     unsigned tail_ms,
-				     unsigned latency_ms,
 				     unsigned options,
 				     void **p_echo )
 {
     speex_ec *echo;
     int sampling_rate;
-    pj_status_t status;
 
     *p_echo = NULL;
 
     echo = PJ_POOL_ZALLOC_T(pool, speex_ec);
     PJ_ASSERT_RETURN(echo != NULL, PJ_ENOMEM);
 
-    if (options & PJMEDIA_ECHO_NO_LOCK) {
-	status = pj_lock_create_null_mutex(pool, "aec%p", &echo->lock);
-	if (status != PJ_SUCCESS)
-	    return status;
-    } else {
-	status = pj_lock_create_simple_mutex(pool, "aec%p", &echo->lock);
-	if (status != PJ_SUCCESS)
-	    return status;
-    }
-
     echo->samples_per_frame = samples_per_frame;
-    echo->prefetch = (latency_ms * clock_rate / 1000) / samples_per_frame;
-    if (echo->prefetch < MIN_PREFETCH)
-	echo->prefetch = MIN_PREFETCH;
-    if (echo->prefetch > MAX_PREFETCH)
-	echo->prefetch = MAX_PREFETCH;
     echo->options = options;
 
-    echo->state = speex_echo_state_init(samples_per_frame,
-					clock_rate * tail_ms / 1000);
+#if 0
+    echo->state = speex_echo_state_init_mc(echo->samples_per_frame,
+					   clock_rate * tail_ms / 1000,
+					   channel_count, channel_count);
+#else
+    PJ_ASSERT_RETURN(channel_count==1, PJ_EINVAL);
+    echo->state = speex_echo_state_init(echo->samples_per_frame,
+    					clock_rate * tail_ms / 1000);
+#endif
     if (echo->state == NULL) {
-	pj_lock_destroy(echo->lock);
 	return PJ_ENOMEM;
     }
 
@@ -305,11 +79,10 @@
     speex_echo_ctl(echo->state, SPEEX_ECHO_SET_SAMPLING_RATE, 
 		   &sampling_rate);
 
-    echo->preprocess = speex_preprocess_state_init(samples_per_frame, 
+    echo->preprocess = speex_preprocess_state_init(echo->samples_per_frame,
 						   clock_rate);
     if (echo->preprocess == NULL) {
 	speex_echo_state_destroy(echo->state);
-	pj_lock_destroy(echo->lock);
 	return PJ_ENOMEM;
     }
 
@@ -324,7 +97,7 @@
     speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_VAD, 
 			 &disabled);
     speex_preprocess_ctl(echo->preprocess, SPEEX_PREPROCESS_SET_DEREVERB, 
-			 &disabled);
+			 &enabled);
 #endif
 
     /* Control echo cancellation in the preprocessor */
@@ -333,33 +106,11 @@
 
 
     /* Create temporary frame for echo cancellation */
-    echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, 2 * samples_per_frame);
+    echo->tmp_frame = (pj_int16_t*) pj_pool_zalloc(pool, 2*samples_per_frame);
     PJ_ASSERT_RETURN(echo->tmp_frame != NULL, PJ_ENOMEM);
 
-    /* Create temporary frame to receive residue */
-    echo->residue = (spx_int32_t*)
-		    pj_pool_zalloc(pool, sizeof(spx_int32_t) * 
-					    (samples_per_frame+1));
-    PJ_ASSERT_RETURN(echo->residue != NULL, PJ_ENOMEM);
-
-    /* Create frame queue */
-    status = pjmedia_frame_queue_create(pool, "aec%p", samples_per_frame*2,
-					samples_per_frame, BUF_COUNT, 
-					&echo->frame_queue);
-    if (status != PJ_SUCCESS) {
-	speex_preprocess_state_destroy(echo->preprocess);
-	speex_echo_state_destroy(echo->state);
-	pj_lock_destroy(echo->lock);
-	return status;
-    }
-
     /* Done */
     *p_echo = echo;
-
-    PJ_LOG(4,(THIS_FILE, "Speex Echo canceller/AEC created, clock_rate=%d, "
-			 "samples per frame=%d, tail length=%d ms, "
-			 "latency=%d ms", 
-			 clock_rate, samples_per_frame, tail_ms, latency_ms));
     return PJ_SUCCESS;
 
 }
@@ -374,9 +125,6 @@
 
     PJ_ASSERT_RETURN(echo && echo->state, PJ_EINVAL);
 
-    if (echo->lock)
-	pj_lock_acquire(echo->lock);
-
     if (echo->state) {
 	speex_echo_state_destroy(echo->state);
 	echo->state = NULL;
@@ -387,137 +135,17 @@
 	echo->preprocess = NULL;
     }
 
-    if (echo->lock) {
-	pj_lock_destroy(echo->lock);
-	echo->lock = NULL;
-    }
-
     return PJ_SUCCESS;
 }
 
 
 /*
- * Let the AEC knows that a frame has been played to the speaker.
+ * Reset AEC
  */
-PJ_DEF(pj_status_t) speex_aec_playback(void *state,
-				       pj_int16_t *play_frm )
+PJ_DEF(void) speex_aec_reset(void *state )
 {
     speex_ec *echo = (speex_ec*) state;
-
-    /* Sanity checks */
-    PJ_ASSERT_RETURN(echo && play_frm, PJ_EINVAL);
-
-    /* The AEC must be configured to support internal playback buffer */
-    PJ_ASSERT_RETURN(echo->frame_queue!= NULL, PJ_EINVALIDOP);
-
-    pj_lock_acquire(echo->lock);
-
-    /* Inc timestamp */
-    echo->play_ts += echo->samples_per_frame;
-
-    /* Initialize frame delay. */
-    if ((echo->ts_flag & TS_FLAG_PLAY) == 0) {
-	echo->ts_flag |= TS_FLAG_PLAY;
-
-	if (echo->ts_flag == TS_FLAG_OK) {
-	    int seq_delay;
-
-	    seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / 
-			    (int)echo->samples_per_frame;
-	    pjmedia_frame_queue_init(echo->frame_queue, seq_delay, 
-				     echo->prefetch);
-	}
-    }
-
-    if (pjmedia_frame_queue_put(echo->frame_queue, play_frm, 
-				echo->samples_per_frame*2, 
-				echo->play_ts) != PJ_SUCCESS)
-    {
-	int seq_delay;
-
-	/* On full reset frame queue */
-	seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / 
-			(int)echo->samples_per_frame;
-	pjmedia_frame_queue_init(echo->frame_queue, seq_delay,
-				 echo->prefetch);
-
-	/* And re-put */
-	pjmedia_frame_queue_put(echo->frame_queue, play_frm, 
-				echo->samples_per_frame*2, 
-				echo->play_ts);
-    }
-
-    pj_lock_release(echo->lock);
-
-    return PJ_SUCCESS;
-}
-
-
-/*
- * Let the AEC knows that a frame has been captured from the microphone.
- */
-PJ_DEF(pj_status_t) speex_aec_capture( void *state,
-				       pj_int16_t *rec_frm,
-				       unsigned options )
-{
-    speex_ec *echo = (speex_ec*) state;
-    pj_status_t status = PJ_SUCCESS;
-
-    /* Sanity checks */
-    PJ_ASSERT_RETURN(echo && rec_frm, PJ_EINVAL);
-
-    /* The AEC must be configured to support internal playback buffer */
-    PJ_ASSERT_RETURN(echo->frame_queue!= NULL, PJ_EINVALIDOP);
-
-    /* Lock mutex */
-    pj_lock_acquire(echo->lock);
-
-    /* Inc timestamp */
-    echo->rec_ts += echo->samples_per_frame;
-
-    /* Init frame delay. */
-    if ((echo->ts_flag & TS_FLAG_REC) == 0) {
-	echo->ts_flag |= TS_FLAG_REC;
-
-	if (echo->ts_flag == TS_FLAG_OK) {
-	    int seq_delay;
-
-	    seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / 
-			    (int)echo->samples_per_frame;
-	    pjmedia_frame_queue_init(echo->frame_queue, seq_delay, 
-				     echo->prefetch);
-	}
-    }
-
-    /* Cancel echo */
-    if (echo->ts_flag == TS_FLAG_OK) {
-	void *play_buf;
-	unsigned size = 0;
-	
-	if (pjmedia_frame_queue_empty(echo->frame_queue)) {
-	    int seq_delay;
-
-	    seq_delay = ((int)echo->play_ts - (int)echo->rec_ts) / 
-			    (int)echo->samples_per_frame;
-	    pjmedia_frame_queue_init(echo->frame_queue, seq_delay, 
-				     echo->prefetch);
-	    status = -1;
-
-	} else {
-	    status = pjmedia_frame_queue_get(echo->frame_queue, echo->rec_ts,
-					     &play_buf, &size);
-	    if (size != 0) {
-		speex_aec_cancel_echo(echo, rec_frm, (pj_int16_t*)play_buf,
-				      options, NULL);
-	    }	
-	}
-
-	if (status != PJ_SUCCESS)
-	    speex_echo_state_reset(echo->state);
-    }
-
-    pj_lock_release(echo->lock);
-    return PJ_SUCCESS;
+    speex_echo_state_reset(echo->state);
 }
 
 
diff --git a/pjmedia/src/pjmedia/echo_suppress.c b/pjmedia/src/pjmedia/echo_suppress.c
index 8ef071b..a86a058 100644
--- a/pjmedia/src/pjmedia/echo_suppress.c
+++ b/pjmedia/src/pjmedia/echo_suppress.c
@@ -35,9 +35,7 @@
  */
 typedef struct echo_supp
 {
-    pj_bool_t		 suppressing;
     pjmedia_silence_det	*sd;
-    pj_time_val		 last_signal;
     unsigned		 samples_per_frame;
     unsigned		 tail_ms;
 } echo_supp;
@@ -49,9 +47,9 @@
  */
 PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool,
 				      unsigned clock_rate,
+				      unsigned channel_count,
 				      unsigned samples_per_frame,
 				      unsigned tail_ms,
-				      unsigned latency_ms,
 				      unsigned options,
 				      void **p_state )
 {
@@ -59,8 +57,8 @@
     pj_status_t status;
 
     PJ_UNUSED_ARG(clock_rate);
+    PJ_UNUSED_ARG(channel_count);
     PJ_UNUSED_ARG(options);
-    PJ_UNUSED_ARG(latency_ms);
 
     ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp);
     ec->samples_per_frame = samples_per_frame;
@@ -91,68 +89,14 @@
 
 
 /*
- * Let the AEC knows that a frame has been played to the speaker.
+ * Reset
  */
-PJ_DEF(pj_status_t) echo_supp_playback( void *state,
-					pj_int16_t *play_frm )
+PJ_DEF(void) echo_supp_reset(void *state)
 {
-    echo_supp *ec = (echo_supp*) state;
-    pj_bool_t silence;
-    pj_bool_t last_suppressing = ec->suppressing;
-
-    silence = pjmedia_silence_det_detect(ec->sd, play_frm,
-					 ec->samples_per_frame, NULL);
-
-    ec->suppressing = !silence;
-
-    if (ec->suppressing) {
-	pj_gettimeofday(&ec->last_signal);
-    }
-
-    if (ec->suppressing!=0 && last_suppressing==0) {
-	PJ_LOG(5,(THIS_FILE, "Start suppressing.."));
-    } else if (ec->suppressing==0 && last_suppressing!=0) {
-	PJ_LOG(5,(THIS_FILE, "Stop suppressing.."));
-    }
-
-    return PJ_SUCCESS;
+    PJ_UNUSED_ARG(state);
+    return;
 }
 
-
-/*
- * Let the AEC knows that a frame has been captured from the microphone.
- */
-PJ_DEF(pj_status_t) echo_supp_capture( void *state,
-				       pj_int16_t *rec_frm,
-				       unsigned options )
-{
-    echo_supp *ec = (echo_supp*) state;
-    pj_time_val now;
-    unsigned delay_ms;
-
-    PJ_UNUSED_ARG(options);
-
-    pj_gettimeofday(&now);
-
-    PJ_TIME_VAL_SUB(now, ec->last_signal);
-    delay_ms = PJ_TIME_VAL_MSEC(now);
-
-    if (delay_ms < ec->tail_ms) {
-#if defined(PJMEDIA_ECHO_SUPPRESS_FACTOR) && PJMEDIA_ECHO_SUPPRESS_FACTOR!=0
-	unsigned i;
-	for (i=0; i<ec->samples_per_frame; ++i) {
-	    rec_frm[i] = (pj_int16_t)(rec_frm[i] >> 
-				      PJMEDIA_ECHO_SUPPRESS_FACTOR);
-	}
-#else
-	pjmedia_zero_samples(rec_frm, ec->samples_per_frame);
-#endif
-    }
-
-    return PJ_SUCCESS;
-}
-
-
 /*
  * Perform echo cancellation.
  */
diff --git a/pjmedia/src/pjmedia/sound_port.c b/pjmedia/src/pjmedia/sound_port.c
index e180ed2..50f35d8 100644
--- a/pjmedia/src/pjmedia/sound_port.c
+++ b/pjmedia/src/pjmedia/sound_port.c
@@ -544,12 +544,16 @@
 	if (status != PJ_SUCCESS)
 	    si.rec_latency = si.play_latency = 0;
 
-	delay_ms = (si.rec_latency + si.play_latency) * 1000 /
-		   snd_port->clock_rate;
-	status = pjmedia_echo_create(pool, snd_port->clock_rate, 
-				    snd_port->samples_per_frame, 
-				    tail_ms, delay_ms,
-				    options, &snd_port->ec_state);
+	//No need to add input latency in the latency calculation,
+	//since actual input latency should be zero.
+	//delay_ms = (si.rec_latency + si.play_latency) * 1000 /
+	//	   snd_port->clock_rate;
+	delay_ms = si.play_latency * 1000 / snd_port->clock_rate;
+	status = pjmedia_echo_create2(pool, snd_port->clock_rate, 
+				      snd_port->channel_count,
+				      snd_port->samples_per_frame, 
+				      tail_ms, delay_ms,
+				      options, &snd_port->ec_state);
 	if (status != PJ_SUCCESS)
 	    snd_port->ec_state = NULL;
 	else
diff --git a/pjsip-apps/src/samples/aectest.c b/pjsip-apps/src/samples/aectest.c
index 14c14d6..1401774 100644
--- a/pjsip-apps/src/samples/aectest.c
+++ b/pjsip-apps/src/samples/aectest.c
@@ -32,10 +32,9 @@
 #include <pjlib-util.h>	/* pj_getopt */
 #include <pjlib.h>
 
-/* For logging purpose. */
-#define THIS_FILE   "playfile.c"
+#define THIS_FILE   "aectest.c"
 #define PTIME	    20
-#define TAIL_LENGTH 800
+#define TAIL_LENGTH 200
 
 static const char *desc = 
 " FILE		    						    \n"
@@ -48,12 +47,22 @@
 "		    						    \n"
 " USAGE		    						    \n"
 "		    						    \n"
-"  aectest INPUT.WAV OUTPUT.WAV					    \n"
+"  aectest [options] <PLAY.WAV> <REC.WAV> <OUTPUT.WAV>		    \n"
 "		    						    \n"
-"  INPUT.WAV is the file to be played to the speaker.		    \n"
-"  OUTPUT.WAV is the output file containing recorded signal from the\n"
-"  microphone.";
+"  <PLAY.WAV>   is the signal played to the speaker.		    \n"
+"  <REC.WAV>    is the signal captured from the microphone.	    \n"
+"  <OUTPUT.WAV> is the output file to store the test result	    \n"
+"\n"
+" options:\n"
+"  -d  The delay between playback and capture in ms. Default is zero.\n"
+"  -l  Set the echo tail length in ms. Default is 200 ms	    \n"
+"  -a  Algorithm: 0=default, 1=speex, 3=echo suppress		    \n";
 
+/* 
+ * Sample session:
+ *
+ * -d 100 -a 1 ../bin/orig8.wav ../bin/echo8.wav ../bin/result8.wav 
+ */
 
 static void app_perror(const char *sender, const char *title, pj_status_t st)
 {
@@ -72,21 +81,55 @@
     pj_caching_pool cp;
     pjmedia_endpt *med_endpt;
     pj_pool_t	  *pool;
-    pjmedia_port  *play_port;
-    pjmedia_port  *rec_port;
-    pjmedia_port  *bidir_port;
-    pjmedia_snd_port *snd;
-    char tmp[10];
+    pjmedia_port  *wav_play;
+    pjmedia_port  *wav_rec;
+    pjmedia_port  *wav_out;
     pj_status_t status;
+    pjmedia_echo_state *ec;
+    pjmedia_frame play_frame, rec_frame;
+    unsigned opt = 0;
+    unsigned latency_ms = 0;
+    unsigned tail_ms = TAIL_LENGTH;
+    pj_timestamp t0, t1;
+    int c;
 
+    pj_optind = 0;
+    while ((c=pj_getopt(argc, argv, "d:l:a:")) !=-1) {
+	switch (c) {
+	case 'd':
+	    latency_ms = atoi(pj_optarg);
+	    break;
+	case 'l':
+	    tail_ms = atoi(pj_optarg);
+	    break;
+	case 'a':
+	    {
+		int alg = atoi(pj_optarg);
+		switch (alg) {
+		case 0:
+		    opt = 0;
+		case 1:
+		    opt = PJMEDIA_ECHO_SPEEX;
+		    break;
+		case 3:
+		    opt = PJMEDIA_ECHO_SIMPLE;
+		    break;
+		default:
+		    puts("Invalid algorithm");
+		    puts(desc);
+		    return 1;
+		}
+	    }
+	    break;
+	}
+    }
 
-    if (argc != 3) {
-    	puts("Error: arguments required");
+    if (argc - pj_optind != 3) {
+	puts("Error: missing argument(s)");
 	puts(desc);
 	return 1;
     }
 
-
     /* Must init PJLIB first: */
     status = pj_init();
     PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
@@ -109,98 +152,98 @@
 			   NULL		    /* callback on error    */
 			   );
 
-    /* Create file media port from the WAV file */
-    status = pjmedia_wav_player_port_create(  pool,	/* memory pool	    */
-					      argv[1],	/* file to play	    */
-					      PTIME,	/* ptime.	    */
-					      0,	/* flags	    */
-					      0,	/* default buffer   */
-					      &play_port);
+    /* Open wav_play */
+    status = pjmedia_wav_player_port_create(pool, argv[pj_optind], PTIME, 
+					    PJMEDIA_FILE_NO_LOOP, 0, 
+					    &wav_play);
     if (status != PJ_SUCCESS) {
-	app_perror(THIS_FILE, "Unable to open input WAV file", status);
+	app_perror(THIS_FILE, "Error opening playback WAV file", status);
 	return 1;
     }
-
-    if (play_port->info.channel_count != 1) {
-	puts("Error: input WAV must have 1 channel audio");
-	return 1;
-    }
-    if (play_port->info.bits_per_sample != 16) {
-	puts("Error: input WAV must be encoded as 16bit PCM");
-	return 1;
-    }
-
-#ifdef PJ_DARWINOS
-    /* Need to force clock rate on MacOS */
-    if (play_port->info.clock_rate != 44100) {
-	pjmedia_port *resample_port;
-
-	status = pjmedia_resample_port_create(pool, play_port, 44100, 0,
-					      &resample_port);
-	if (status != PJ_SUCCESS) {
-	    app_perror(THIS_FILE, "Unable to create resampling port", status);
-	    return 1;
-	}
-
-	data.play_port = resample_port;
-    }
-#endif
-
-    /* Create WAV output file port */
-    status = pjmedia_wav_writer_port_create(pool, argv[2], 
-					    play_port->info.clock_rate,
-					    play_port->info.channel_count,
-					    play_port->info.samples_per_frame,
-					    play_port->info.bits_per_sample,
-					    0, 0, &rec_port);
-    if (status != PJ_SUCCESS) {
-	app_perror(THIS_FILE, "Unable to open output file", status);
-	return 1;
-    }
-
-    /* Create bidirectional port from the WAV ports */
-    pjmedia_bidirectional_port_create(pool, play_port, rec_port, &bidir_port);
-
-    /* Create sound device. */
-    status = pjmedia_snd_port_create(pool, -1, -1, 
-				     play_port->info.clock_rate,
-				     play_port->info.channel_count,
-				     play_port->info.samples_per_frame,
-				     play_port->info.bits_per_sample,
-				     0, &snd);
-    if (status != PJ_SUCCESS) {
-	app_perror(THIS_FILE, "Unable to open sound device", status);
-	return 1;
-    }
-
-
-    /* Customize AEC */
-    pjmedia_snd_port_set_ec(snd, pool, TAIL_LENGTH, 0);
-
-    /* Connect sound to the port */
-    pjmedia_snd_port_connect(snd, bidir_port);
-
-
-    puts("");
-    printf("Playing %s and recording to %s\n", argv[1], argv[2]);
-    puts("Press <ENTER> to quit");
-
-    fgets(tmp, sizeof(tmp), stdin);
-
     
-    /* Start deinitialization: */
+    /* Open recorded wav */
+    status = pjmedia_wav_player_port_create(pool, argv[pj_optind+1], PTIME, 
+					    PJMEDIA_FILE_NO_LOOP, 0, 
+					    &wav_rec);
+    if (status != PJ_SUCCESS) {
+	app_perror(THIS_FILE, "Error opening recorded WAV file", status);
+	return 1;
+    }
 
-    /* Destroy sound device */
-    status = pjmedia_snd_port_destroy( snd );
-    PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+    /* play and rec WAVs must have the same clock rate */
+    if (wav_play->info.clock_rate != wav_rec->info.clock_rate) {
+	puts("Error: clock rate mismatch in the WAV files");
+	return 1;
+    }
 
+    /* .. and channel count */
+    if (wav_play->info.channel_count != wav_rec->info.channel_count) {
+	puts("Error: clock rate mismatch in the WAV files");
+	return 1;
+    }
+
+    /* Create output wav */
+    status = pjmedia_wav_writer_port_create(pool, argv[pj_optind+2],
+					    wav_play->info.clock_rate,
+					    wav_play->info.channel_count,
+					    wav_play->info.samples_per_frame,
+					    wav_play->info.bits_per_sample,
+					    0, 0, &wav_out);
+    if (status != PJ_SUCCESS) {
+	app_perror(THIS_FILE, "Error opening output WAV file", status);
+	return 1;
+    }
+
+    /* Create echo canceller */
+    status = pjmedia_echo_create2(pool, wav_play->info.clock_rate,
+				  wav_play->info.channel_count,
+				  wav_play->info.samples_per_frame,
+				  tail_ms, latency_ms,
+				  opt, &ec);
+    if (status != PJ_SUCCESS) {
+	app_perror(THIS_FILE, "Error creating EC", status);
+	return 1;
+    }
+
+
+    /* Processing loop */
+    play_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1);
+    rec_frame.buf = pj_pool_alloc(pool, wav_play->info.samples_per_frame<<1);
+    pj_get_timestamp(&t0);
+    for (;;) {
+	play_frame.size = wav_play->info.samples_per_frame << 1;
+	status = pjmedia_port_get_frame(wav_play, &play_frame);
+	if (status != PJ_SUCCESS)
+	    break;
+
+	status = pjmedia_echo_playback(ec, (short*)play_frame.buf);
+
+	rec_frame.size = wav_play->info.samples_per_frame << 1;
+	status = pjmedia_port_get_frame(wav_rec, &rec_frame);
+	if (status != PJ_SUCCESS)
+	    break;
+
+	status = pjmedia_echo_capture(ec, (short*)rec_frame.buf, 0);
+
+	//status = pjmedia_echo_cancel(ec, (short*)rec_frame.buf, 
+	//			     (short*)play_frame.buf, 0, NULL);
+
+	pjmedia_port_put_frame(wav_out, &rec_frame);
+    }
+    pj_get_timestamp(&t1);
+
+    PJ_LOG(3,(THIS_FILE, "Completed in %u msec\n", pj_elapsed_msec(&t0, &t1)));
 
     /* Destroy file port(s) */
-    status = pjmedia_port_destroy( play_port );
+    status = pjmedia_port_destroy( wav_play );
     PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
-    status = pjmedia_port_destroy( rec_port );
+    status = pjmedia_port_destroy( wav_rec );
+    PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
+    status = pjmedia_port_destroy( wav_out );
     PJ_ASSERT_RETURN(status == PJ_SUCCESS, 1);
 
+    /* Destroy ec */
+    pjmedia_echo_destroy(ec);
 
     /* Release application pool */
     pj_pool_release( pool );
diff --git a/third_party/speex/libspeex/mdf.c b/third_party/speex/libspeex/mdf.c
index 456ab84..1fbb4d6 100644
--- a/third_party/speex/libspeex/mdf.c
+++ b/third_party/speex/libspeex/mdf.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2008 Jean-Marc Valin
+/* Copyright (C) 2003-2006 Jean-Marc Valin
 
    File: mdf.c
    Echo canceller based on the MDF algorithm (see below)
@@ -88,12 +88,6 @@
 #define WEIGHT_SHIFT 0
 #endif
 
-#ifdef FIXED_POINT
-#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))  
-#else
-#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))  
-#endif
-
 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
    and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
 #define TWO_PATH
@@ -137,8 +131,6 @@
    int adapted;
    int saturated;
    int screwed_up;
-   int C;                    /** Number of input channels (microphones) */
-   int K;                    /** Number of output channels (loudspeakers) */
    spx_int32_t sampling_rate;
    spx_word16_t spec_average;
    spx_word16_t beta0;
@@ -179,10 +171,10 @@
    spx_word16_t *window;
    spx_word16_t *prop;
    void *fft_table;
-   spx_word16_t *memX, *memD, *memE;
+   spx_word16_t memX, memD, memE;
    spx_word16_t preemph;
    spx_word16_t notch_radius;
-   spx_mem_t *notch_mem;
+   spx_mem_t notch_mem[2];
 
    /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
    spx_int16_t *play_buf;
@@ -190,7 +182,7 @@
    int play_buf_started;
 };
 
-static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
+static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem)
 {
    int i;
    spx_word16_t den2;
@@ -202,7 +194,7 @@
    /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
    for (i=0;i<len;i++)
    {
-      spx_word16_t vin = in[i*stride];
+      spx_word16_t vin = in[i];
       spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
 #ifdef FIXED_POINT
       mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
@@ -242,18 +234,6 @@
    ps[j]=MULT16_16(X[i],X[i]);
 }
 
-/** Compute power spectrum of a half-complex (packed) vector and accumulate */
-static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
-{
-   int i, j;
-   ps[0]+=MULT16_16(X[0],X[0]);
-   for (i=1,j=1;i<N-1;i+=2,j++)
-   {
-      ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
-   }
-   ps[j]+=MULT16_16(X[i],X[i]);
-}
-
 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
 #ifdef FIXED_POINT
 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
@@ -350,17 +330,16 @@
    prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
 }
 
-static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
+static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, spx_word16_t *prop)
 {
-   int i, j, p;
+   int i, j;
    spx_word16_t max_sum = 1;
    spx_word32_t prop_sum = 1;
    for (i=0;i<M;i++)
    {
       spx_word32_t tmp = 1;
-      for (p=0;p<P;p++)
-         for (j=0;j<N;j++)
-            tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
+      for (j=0;j<N;j++)
+         tmp += MULT16_16(EXTRACT16(SHR32(W[i*N+j],18)), EXTRACT16(SHR32(W[i*N+j],18)));
 #ifdef FIXED_POINT
       /* Just a security in case an overflow were to occur */
       tmp = MIN32(ABS32(tmp), 536870912);
@@ -399,20 +378,11 @@
 #endif
 
 /** Creates a new echo canceller state */
-EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
+SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
 {
-   return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
-}
-
-EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
-{
-   int i,N,M, C, K;
+   int i,N,M;
    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
 
-   st->K = nb_speakers;
-   st->C = nb_mic;
-   C=st->C;
-   K=st->K;
 #ifdef DUMP_ECHO_CANCEL_DATA
    if (rFile || pFile || oFile)
       speex_fatal("Opening dump files twice");
@@ -443,23 +413,23 @@
 
    st->fft_table = spx_fft_init(N);
    
-   st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
-   st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
-   st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
-   st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
-   st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
+   st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
+   st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
+   st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t));
+   st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
+   st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
    st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
 
-   st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
-   st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
-   st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
-   st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
+   st->X = (spx_word16_t*)speex_alloc((M+1)*N*sizeof(spx_word16_t));
+   st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
+   st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
+   st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
 #ifdef TWO_PATH
-   st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
+   st->foreground = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
 #endif
    st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
@@ -480,7 +450,7 @@
 #endif
    for (i=0;i<=st->frame_size;i++)
       st->power_1[i] = FLOAT_ONE;
-   for (i=0;i<N*M*K*C;i++)
+   for (i=0;i<N*M;i++)
       st->W[i] = 0;
    {
       spx_word32_t sum = 0;
@@ -495,13 +465,11 @@
       }
       for (i=M-1;i>=0;i--)
       {
-         st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
+         st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum);
       }
    }
    
-   st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
-   st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
-   st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
+   st->memX=st->memD=st->memE=0;
    st->preemph = QCONST16(.9,15);
    if (st->sampling_rate<12000)
       st->notch_radius = QCONST16(.9, 15);
@@ -510,7 +478,7 @@
    else
       st->notch_radius = QCONST16(.992, 15);
 
-   st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
+   st->notch_mem[0] = st->notch_mem[1] = 0;
    st->adapted = 0;
    st->Pey = st->Pyy = FLOAT_ONE;
    
@@ -519,7 +487,7 @@
    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
 #endif
    
-   st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
+   st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
    st->play_buf_started = 0;
    
@@ -527,15 +495,13 @@
 }
 
 /** Resets echo canceller state */
-EXPORT void speex_echo_state_reset(SpeexEchoState *st)
+void speex_echo_state_reset(SpeexEchoState *st)
 {
-   int i, M, N, C, K;
+   int i, M, N;
    st->cancel_count=0;
    st->screwed_up = 0;
    N = st->window_size;
    M = st->M;
-   C=st->C;
-   K=st->K;
    for (i=0;i<N*M;i++)
       st->W[i] = 0;
 #ifdef TWO_PATH
@@ -555,20 +521,13 @@
    {
       st->last_y[i] = 0;
    }
-   for (i=0;i<N*C;i++)
+   for (i=0;i<N;i++)
    {
       st->E[i] = 0;
-   }
-   for (i=0;i<N*K;i++)
-   {
       st->x[i] = 0;
    }
-   for (i=0;i<2*C;i++)
-      st->notch_mem[i] = 0;
-   for (i=0;i<C;i++)
-      st->memD[i]=st->memE[i]=0;
-   for (i=0;i<K;i++)
-      st->memX[i]=0;
+   st->notch_mem[0] = st->notch_mem[1] = 0;
+   st->memX=st->memD=st->memE=0;
 
    st->saturated = 0;
    st->adapted = 0;
@@ -586,7 +545,7 @@
 }
 
 /** Destroys an echo canceller state */
-EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
+void speex_echo_state_destroy(SpeexEchoState *st)
 {
    spx_fft_destroy(st->fft_table);
 
@@ -617,11 +576,6 @@
 #ifdef FIXED_POINT
    speex_free(st->wtmp2);
 #endif
-   speex_free(st->memX);
-   speex_free(st->memD);
-   speex_free(st->memE);
-   speex_free(st->notch_mem);
-
    speex_free(st->play_buf);
    speex_free(st);
    
@@ -633,7 +587,7 @@
 #endif
 }
 
-EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
+void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
 {
    int i;
    /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
@@ -656,7 +610,7 @@
    }
 }
 
-EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
+void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
 {
    /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
    if (!st->play_buf_started)
@@ -683,16 +637,16 @@
 }
 
 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
-EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
+void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
 {
    speex_echo_cancellation(st, in, far_end, out);
 }
 
 /** Performs echo cancellation on a frame */
-EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
+void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
 {
-   int i,j, chan, speak;
-   int N,M, C, K;
+   int i,j;
+   int N,M;
    spx_word32_t Syy,See,Sxx,Sdd, Sff;
 #ifdef TWO_PATH
    spx_word32_t Dbf;
@@ -707,9 +661,6 @@
    
    N = st->window_size;
    M = st->M;
-   C = st->C;
-   K = st->K;
-
    st->cancel_count++;
 #ifdef FIXED_POINT
    ss=DIV32_16(11469,M);
@@ -719,178 +670,137 @@
    ss_1 = 1-ss;
 #endif
 
-   for (chan = 0; chan < C; chan++)
+   /* Apply a notch filter to make sure DC doesn't end up causing problems */
+   filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem);
+   /* Copy input data to buffer and apply pre-emphasis */
+   for (i=0;i<st->frame_size;i++)
    {
-      /* Apply a notch filter to make sure DC doesn't end up causing problems */
-      filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
-      /* Copy input data to buffer and apply pre-emphasis */
-      /* Copy input data to buffer */
-      for (i=0;i<st->frame_size;i++)
-      {
-         spx_word32_t tmp32;
-         /* FIXME: This core has changed a bit, need to merge properly */
-         tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
+      spx_word32_t tmp32;
+      tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX)));
 #ifdef FIXED_POINT
-         if (tmp32 > 32767)
-         {
-            tmp32 = 32767;
-            if (st->saturated == 0)
-               st->saturated = 1;
-         }      
-         if (tmp32 < -32767)
-         {
-            tmp32 = -32767;
-            if (st->saturated == 0)
-               st->saturated = 1;
-         }
-#endif
-         st->memD[chan] = st->input[chan*st->frame_size+i];
-         st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
+      /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */
+      if (tmp32 > 32767)
+      {
+         tmp32 = 32767;
+         st->saturated = M+1;
       }
+      if (tmp32 < -32767)
+      {
+         tmp32 = -32767;
+         st->saturated = M+1;
+      }      
+#endif
+      st->x[i+st->frame_size] = EXTRACT16(tmp32);
+      st->memX = far_end[i];
+      
+      tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD)));
+#ifdef FIXED_POINT
+      if (tmp32 > 32767)
+      {
+         tmp32 = 32767;
+         if (st->saturated == 0)
+            st->saturated = 1;
+      }      
+      if (tmp32 < -32767)
+      {
+         tmp32 = -32767;
+         if (st->saturated == 0)
+            st->saturated = 1;
+      }
+#endif
+      st->memD = st->input[i];
+      st->input[i] = tmp32;
    }
 
-   for (speak = 0; speak < K; speak++)
+   /* Shift memory: this could be optimized eventually*/
+   for (j=M-1;j>=0;j--)
    {
-      for (i=0;i<st->frame_size;i++)
-      {
-         spx_word32_t tmp32;
-         st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
-         tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
-#ifdef FIXED_POINT
-         /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
-         if (tmp32 > 32767)
-         {
-            tmp32 = 32767;
-            st->saturated = M+1;
-         }      
-         if (tmp32 < -32767)
-         {
-            tmp32 = -32767;
-            st->saturated = M+1;
-         }      
-#endif
-         st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
-         st->memX[speak] = far_end[i*K+speak];
-      }
-   }   
-   
-   for (speak = 0; speak < K; speak++)
-   {
-      /* Shift memory: this could be optimized eventually*/
-      for (j=M-1;j>=0;j--)
-      {
-         for (i=0;i<N;i++)
-            st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
-      }
-      /* Convert x (echo input) to frequency domain */
-      spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
+      for (i=0;i<N;i++)
+         st->X[(j+1)*N+i] = st->X[j*N+i];
    }
+
+   /* Convert x (far end) to frequency domain */
+   spx_fft(st->fft_table, st->x, &st->X[0]);
+   for (i=0;i<N;i++)
+      st->last_y[i] = st->x[i];
+   Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size);
+   for (i=0;i<st->frame_size;i++)
+      st->x[i] = st->x[i+st->frame_size];
+   /* From here on, the top part of x is used as scratch space */
    
-   Sxx = 0;
-   for (speak = 0; speak < K; speak++)
-   {
-      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
-      power_spectrum_accum(st->X+speak*N, st->Xf, N);
-   }
-   
-   Sff = 0;  
-   for (chan = 0; chan < C; chan++)
-   {
 #ifdef TWO_PATH
-      /* Compute foreground filter */
-      spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
-      spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
-      for (i=0;i<st->frame_size;i++)
-         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
-      Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
+   /* Compute foreground filter */
+   spectral_mul_accum16(st->X, st->foreground, st->Y, N, M);   
+   spx_ifft(st->fft_table, st->Y, st->e);
+   for (i=0;i<st->frame_size;i++)
+      st->e[i] = SUB16(st->input[i], st->e[i+st->frame_size]);
+   Sff = mdf_inner_prod(st->e, st->e, st->frame_size);
 #endif
-   }
    
    /* Adjust proportional adaption rate */
-   /* FIXME: Adjust that for C, K*/
-   if (st->adapted)
-      mdf_adjust_prop (st->W, N, M, C*K, st->prop);
+   mdf_adjust_prop (st->W, N, M, st->prop);
    /* Compute weight gradient */
    if (st->saturated == 0)
    {
-      for (chan = 0; chan < C; chan++)
+      for (j=M-1;j>=0;j--)
       {
-         for (speak = 0; speak < K; speak++)
-         {
-            for (j=M-1;j>=0;j--)
-            {
-               weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
-               for (i=0;i<N;i++)
-                  st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
-            }
-         }
+         weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N);
+         for (i=0;i<N;i++)
+            st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]);
+         
       }
    } else {
       st->saturated--;
    }
    
-   /* FIXME: MC conversion required */ 
    /* Update weight to prevent circular convolution (MDF / AUMDF) */
-   for (chan = 0; chan < C; chan++)
+   for (j=0;j<M;j++)
    {
-      for (speak = 0; speak < K; speak++)
+      /* This is a variant of the Alternatively Updated MDF (AUMDF) */
+      /* Remove the "if" to make this an MDF filter */
+      if (j==0 || st->cancel_count%(M-1) == j-1)
       {
-         for (j=0;j<M;j++)
-         {
-            /* This is a variant of the Alternatively Updated MDF (AUMDF) */
-            /* Remove the "if" to make this an MDF filter */
-            if (j==0 || st->cancel_count%(M-1) == j-1)
-            {
 #ifdef FIXED_POINT
-               for (i=0;i<N;i++)
-                  st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
-               spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
-               for (i=0;i<st->frame_size;i++)
-               {
-                  st->wtmp[i]=0;
-               }
-               for (i=st->frame_size;i<N;i++)
-               {
-                  st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
-               }
-               spx_fft(st->fft_table, st->wtmp, st->wtmp2);
-               /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
-               for (i=0;i<N;i++)
-                  st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
-#else
-               spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
-               for (i=st->frame_size;i<N;i++)
-               {
-                  st->wtmp[i]=0;
-               }
-               spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
-#endif
-            }
+         for (i=0;i<N;i++)
+            st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16));
+         spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
+         for (i=0;i<st->frame_size;i++)
+         {
+            st->wtmp[i]=0;
          }
+         for (i=st->frame_size;i<N;i++)
+         {
+            st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
+         }
+         spx_fft(st->fft_table, st->wtmp, st->wtmp2);
+         /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
+         for (i=0;i<N;i++)
+            st->W[j*N+i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
+#else
+         spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
+         for (i=st->frame_size;i<N;i++)
+         {
+            st->wtmp[i]=0;
+         }
+         spx_fft(st->fft_table, st->wtmp, &st->W[j*N]);
+#endif
       }
    }
-   
-   /* So we can use power_spectrum_accum */ 
-   for (i=0;i<=st->frame_size;i++)
-      st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
-      
-   Dbf = 0;
-   See = 0;    
+
+   /* Compute filter response Y */
+   spectral_mul_accum(st->X, st->W, st->Y, N, M);
+   spx_ifft(st->fft_table, st->Y, st->y);
+
 #ifdef TWO_PATH
    /* Difference in response, this is used to estimate the variance of our residual power estimate */
-   for (chan = 0; chan < C; chan++)
-   {
-      spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
-      spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
-      for (i=0;i<st->frame_size;i++)
-         st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
-      Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
-      for (i=0;i<st->frame_size;i++)
-         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
-      See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
-   }
+   for (i=0;i<st->frame_size;i++)
+      st->e[i] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]);
+   Dbf = 10+mdf_inner_prod(st->e, st->e, st->frame_size);
 #endif
 
+   for (i=0;i<st->frame_size;i++)
+      st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
+   See = mdf_inner_prod(st->e, st->e, st->frame_size);
 #ifndef TWO_PATH
    Sff = See;
 #endif
@@ -927,12 +837,11 @@
       st->Davg1 = st->Davg2 = 0;
       st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
       /* Copy background filter to foreground filter */
-      for (i=0;i<N*M*C*K;i++)
+      for (i=0;i<N*M;i++)
          st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
       /* Apply a smooth transition so as to not introduce blocking artifacts */
-      for (chan = 0; chan < C; chan++)
-         for (i=0;i<st->frame_size;i++)
-            st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
+      for (i=0;i<st->frame_size;i++)
+         st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]);
    } else {
       int reset_background=0;
       /* Otherwise, check if the background filter is significantly worse */
@@ -945,16 +854,13 @@
       if (reset_background)
       {
          /* Copy foreground filter to background filter */
-         for (i=0;i<N*M*C*K;i++)
+         for (i=0;i<N*M;i++)
             st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
          /* We also need to copy the output so as to get correct adaptation */
-         for (chan = 0; chan < C; chan++)
-         {        
-            for (i=0;i<st->frame_size;i++)
-               st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
-            for (i=0;i<st->frame_size;i++)
-               st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
-         }        
+         for (i=0;i<st->frame_size;i++)
+            st->y[i+st->frame_size] = st->e[i+st->frame_size];
+         for (i=0;i<st->frame_size;i++)
+            st->e[i] = SUB16(st->input[i], st->y[i+st->frame_size]);
          See = Sff;
          st->Davg1 = st->Davg2 = 0;
          st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
@@ -962,57 +868,47 @@
    }
 #endif
 
-   Sey = Syy = Sdd = 0;  
-   for (chan = 0; chan < C; chan++)
-   {    
-      /* Compute error signal (for the output with de-emphasis) */ 
-      for (i=0;i<st->frame_size;i++)
-      {
-         spx_word32_t tmp_out;
+   /* Compute error signal (for the output with de-emphasis) */ 
+   for (i=0;i<st->frame_size;i++)
+   {
+      spx_word32_t tmp_out;
 #ifdef TWO_PATH
-         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
+      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size]));
 #else
-         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
+      tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size]));
 #endif
-         tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
+      /* Saturation */
+      if (tmp_out>32767)
+         tmp_out = 32767;
+      else if (tmp_out<-32768)
+         tmp_out = -32768;
+      tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE)));
       /* This is an arbitrary test for saturation in the microphone signal */
-         if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
-         {
+      if (in[i] <= -32000 || in[i] >= 32000)
+      {
+         tmp_out = 0;
          if (st->saturated == 0)
             st->saturated = 1;
-         }
-         out[i*C+chan] = WORD2INT(tmp_out);
-         st->memE[chan] = tmp_out;
       }
-
+      out[i] = (spx_int16_t)tmp_out;
+      st->memE = tmp_out;
+   }
+   
 #ifdef DUMP_ECHO_CANCEL_DATA
-      dump_audio(in, far_end, out, st->frame_size);
+   dump_audio(in, far_end, out, st->frame_size);
 #endif
    
-      /* Compute error signal (filter update version) */ 
-      for (i=0;i<st->frame_size;i++)
-      {
-         st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
-         st->e[chan*N+i] = 0;
-      }
-      
-      /* Compute a bunch of correlations */
-      /* FIXME: bad merge */
-      Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
-      Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
-      Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
-      
-      /* Convert error to frequency domain */
-      spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
-      for (i=0;i<st->frame_size;i++)
-         st->y[i+chan*N] = 0;
-      spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
-   
-      /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
-      power_spectrum_accum(st->E+chan*N, st->Rf, N);
-      power_spectrum_accum(st->Y+chan*N, st->Yf, N);
-    
+   /* Compute error signal (filter update version) */ 
+   for (i=0;i<st->frame_size;i++)
+   {
+      st->e[i+st->frame_size] = st->e[i];
+      st->e[i] = 0;
    }
+
+   /* Compute a bunch of correlations */
+   Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size);
+   Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size);
+   Sdd = mdf_inner_prod(st->input, st->input, st->frame_size);
    
    /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
    
@@ -1025,7 +921,7 @@
    {
       /* Things have gone really bad */
       st->screwed_up += 50;
-      for (i=0;i<st->frame_size*C;i++)
+      for (i=0;i<st->frame_size;i++)
          out[i] = 0;
    } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
    {
@@ -1044,17 +940,36 @@
 
    /* Add a small noise floor to make sure not to have problems when dividing */
    See = MAX32(See, SHR32(MULT16_16(N, 100),6));
-     
-   for (speak = 0; speak < K; speak++)
-   {
-      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
-      power_spectrum_accum(st->X+speak*N, st->Xf, N);
-   }
 
+   /* Convert error to frequency domain */
+   spx_fft(st->fft_table, st->e, st->E);
+   for (i=0;i<st->frame_size;i++)
+      st->y[i] = 0;
+   spx_fft(st->fft_table, st->y, st->Y);
+
+   /* Compute power spectrum of far end (X), error (E) and filter response (Y) */
+   power_spectrum(st->E, st->Rf, N);
+   power_spectrum(st->Y, st->Yf, N);
+   power_spectrum(st->X, st->Xf, N);
    
    /* Smooth far end energy estimate over time */
    for (j=0;j<=st->frame_size;j++)
       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
+   
+   /* Enable this to compute the power based only on the tail (would need to compute more 
+      efficiently to make this really useful */
+   if (0)
+   {
+      float scale2 = .5f/M;
+      for (j=0;j<=st->frame_size;j++)
+         st->power[j] = 100;
+      for (i=0;i<M;i++)
+      {
+         power_spectrum(&st->X[i*N], st->Xf, N);
+         for (j=0;j<=st->frame_size;j++)
+            st->power[j] += scale2*st->Xf[j];
+      }
+   }
 
    /* Compute filtered spectra and (cross-)correlations */
    for (j=st->frame_size;j>=0;j--)
@@ -1176,13 +1091,13 @@
       st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
    }
 
-   /* FIXME: MC conversion required */ 
-      for (i=0;i<st->frame_size;i++)
-         st->last_y[i] = st->last_y[st->frame_size+i];
+   /* Save residual echo so it can be used by the nonlinear processor */
    if (st->adapted)
    {
       /* If the filter is adapted, take the filtered echo */
       for (i=0;i<st->frame_size;i++)
+         st->last_y[i] = st->last_y[st->frame_size+i];
+      for (i=0;i<st->frame_size;i++)
          st->last_y[st->frame_size+i] = in[i]-out[i];
    } else {
       /* If filter isn't adapted yet, all we can do is take the far end signal directly */
@@ -1226,7 +1141,7 @@
    
 }
 
-EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
+int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
 {
    switch(request)
    {
@@ -1254,29 +1169,6 @@
       case SPEEX_ECHO_GET_SAMPLING_RATE:
          (*(int*)ptr) = st->sampling_rate;
          break;
-      case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
-         /*FIXME: Implement this for multiple channels */
-         *((spx_int32_t *)ptr) = st->M * st->frame_size;
-         break;
-      case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
-      {
-         int M = st->M, N = st->window_size, n = st->frame_size, i, j;
-         spx_int32_t *filt = (spx_int32_t *) ptr;
-         for(j=0;j<M;j++)
-         {
-            /*FIXME: Implement this for multiple channels */
-#ifdef FIXED_POINT
-            for (i=0;i<N;i++)
-               st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
-            spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
-#else
-            spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
-#endif
-            for(i=0;i<n;i++)
-               filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
-         }
-      }
-         break;
       default:
          speex_warning_int("Unknown speex_echo_ctl request: ", request);
          return -1;