Blame - jni/libopus/sources/celt/mdct.c - jami-client-android

blob: 16a36c692f9b35b50971516ec249f51d1c71216c [file] [log] [blame]

Alexandre Lision	744f742	2013-09-25 11:39:37 -0400	[diff] [blame]	1	/* Copyright (c) 2007-2008 CSIRO
				2	Copyright (c) 2007-2008 Xiph.Org Foundation
				3	Written by Jean-Marc Valin */
				4	/*
				5	Redistribution and use in source and binary forms, with or without
				6	modification, are permitted provided that the following conditions
				7	are met:
				8
				9	- Redistributions of source code must retain the above copyright
				10	notice, this list of conditions and the following disclaimer.
				11
				12	- Redistributions in binary form must reproduce the above copyright
				13	notice, this list of conditions and the following disclaimer in the
				14	documentation and/or other materials provided with the distribution.
				15
				16	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				17	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				18	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				19	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
				20	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
				21	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				22	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				23	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				24	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				25	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				26	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				27	*/
				28
				29	/* This is a simple MDCT implementation that uses a N/4 complex FFT
				30	to do most of the work. It should be relatively straightforward to
				31	plug in pretty much and FFT here.
				32
				33	This replaces the Vorbis FFT (and uses the exact same API), which
				34	was a bit too messy and that was ending up duplicating code
				35	(might as well use the same FFT everywhere).
				36
				37	The algorithm is similar to (and inspired from) Fabrice Bellard's
				38	MDCT implementation in FFMPEG, but has differences in signs, ordering
				39	and scaling in many places.
				40	*/
				41
				42	#ifndef SKIP_CONFIG_H
				43	#ifdef HAVE_CONFIG_H
				44	#include "config.h"
				45	#endif
				46	#endif
				47
				48	#include "mdct.h"
				49	#include "kiss_fft.h"
				50	#include "_kiss_fft_guts.h"
				51	#include <math.h>
				52	#include "os_support.h"
				53	#include "mathops.h"
				54	#include "stack_alloc.h"
				55
				56	#ifdef CUSTOM_MODES
				57
				58	int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
				59	{
				60	int i;
				61	int N4;
				62	kiss_twiddle_scalar *trig;
				63	#if defined(FIXED_POINT)
				64	int N2=N>>1;
				65	#endif
				66	l->n = N;
				67	N4 = N>>2;
				68	l->maxshift = maxshift;
				69	for (i=0;i<=maxshift;i++)
				70	{
				71	if (i==0)
				72	l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
				73	else
				74	l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
				75	#ifndef ENABLE_TI_DSPLIB55
				76	if (l->kfft[i]==NULL)
				77	return 0;
				78	#endif
				79	}
				80	l->trig = trig = (kiss_twiddle_scalar)opus_alloc((N4+1)sizeof(kiss_twiddle_scalar));
				81	if (l->trig==NULL)
				82	return 0;
				83	/* We have enough points that sine isn't necessary */
				84	#if defined(FIXED_POINT)
				85	for (i=0;i<=N4;i++)
				86	trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
				87	#else
				88	for (i=0;i<=N4;i++)
				89	trig[i] = (kiss_twiddle_scalar)cos(2PIi/N);
				90	#endif
				91	return 1;
				92	}
				93
				94	void clt_mdct_clear(mdct_lookup *l)
				95	{
				96	int i;
				97	for (i=0;i<=l->maxshift;i++)
				98	opus_fft_free(l->kfft[i]);
				99	opus_free((kiss_twiddle_scalar*)l->trig);
				100	}
				101
				102	#endif /* CUSTOM_MODES */
				103
				104	/* Forward MDCT trashes the input array */
				105	void clt_mdct_forward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,
				106	const opus_val16 *window, int overlap, int shift, int stride)
				107	{
				108	int i;
				109	int N, N2, N4;
				110	kiss_twiddle_scalar sine;
				111	VARDECL(kiss_fft_scalar, f);
				112	SAVE_STACK;
				113	N = l->n;
				114	N >>= shift;
				115	N2 = N>>1;
				116	N4 = N>>2;
				117	ALLOC(f, N2, kiss_fft_scalar);
				118	/* sin(x) ~= x here */
				119	#ifdef FIXED_POINT
				120	sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
				121	#else
				122	sine = (kiss_twiddle_scalar)2PI(.125f)/N;
				123	#endif
				124
				125	/* Consider the input to be composed of four blocks: [a, b, c, d] */
				126	/* Window, shuffle, fold */
				127	{
				128	/* Temp pointers to make it really clear to the compiler what we're doing */
				129	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
				130	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
				131	kiss_fft_scalar * OPUS_RESTRICT yp = f;
				132	const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
				133	const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
				134	for(i=0;i<(overlap>>2);i++)
				135	{
				136	/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
				137	yp++ = MULT16_32_Q15(wp2, xp1[N2]) + MULT16_32_Q15(wp1,xp2);
				138	yp++ = MULT16_32_Q15(wp1, xp1) - MULT16_32_Q15(wp2, xp2[-N2]);
				139	xp1+=2;
				140	xp2-=2;
				141	wp1+=2;
				142	wp2-=2;
				143	}
				144	wp1 = window;
				145	wp2 = window+overlap-1;
				146	for(;i<N4-(overlap>>2);i++)
				147	{
				148	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
				149	yp++ = xp2;
				150	yp++ = xp1;
				151	xp1+=2;
				152	xp2-=2;
				153	}
				154	for(;i<N4;i++)
				155	{
				156	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
				157	yp++ = -MULT16_32_Q15(wp1, xp1[-N2]) + MULT16_32_Q15(wp2, xp2);
				158	yp++ = MULT16_32_Q15(wp2, xp1) + MULT16_32_Q15(wp1, xp2[N2]);
				159	xp1+=2;
				160	xp2-=2;
				161	wp1+=2;
				162	wp2-=2;
				163	}
				164	}
				165	/* Pre-rotation */
				166	{
				167	kiss_fft_scalar * OPUS_RESTRICT yp = f;
				168	const kiss_twiddle_scalar *t = &l->trig[0];
				169	for(i=0;i<N4;i++)
				170	{
				171	kiss_fft_scalar re, im, yr, yi;
				172	re = yp[0];
				173	im = yp[1];
				174	yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
				175	yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
				176	/* works because the cos is nearly one */
				177	*yp++ = yr + S_MUL(yi,sine);
				178	*yp++ = yi - S_MUL(yr,sine);
				179	}
				180	}
				181
				182	/* N/4 complex FFT, down-scales by 4/N */
				183	opus_fft(l->kfft[shift], (kiss_fft_cpx )f, (kiss_fft_cpx )in);
				184
				185	/* Post-rotate */
				186	{
				187	/* Temp pointers to make it really clear to the compiler what we're doing */
				188	const kiss_fft_scalar * OPUS_RESTRICT fp = in;
				189	kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
				190	kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
				191	const kiss_twiddle_scalar *t = &l->trig[0];
				192	/* Temp pointers to make it really clear to the compiler what we're doing */
				193	for(i=0;i<N4;i++)
				194	{
				195	kiss_fft_scalar yr, yi;
				196	yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
				197	yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
				198	/* works because the cos is nearly one */
				199	*yp1 = yr - S_MUL(yi,sine);
				200	*yp2 = yi + S_MUL(yr,sine);;
				201	fp += 2;
				202	yp1 += 2*stride;
				203	yp2 -= 2*stride;
				204	}
				205	}
				206	RESTORE_STACK;
				207	}
				208
				209	void clt_mdct_backward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,
				210	const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
				211	{
				212	int i;
				213	int N, N2, N4;
				214	kiss_twiddle_scalar sine;
				215	VARDECL(kiss_fft_scalar, f);
				216	VARDECL(kiss_fft_scalar, f2);
				217	SAVE_STACK;
				218	N = l->n;
				219	N >>= shift;
				220	N2 = N>>1;
				221	N4 = N>>2;
				222	ALLOC(f, N2, kiss_fft_scalar);
				223	ALLOC(f2, N2, kiss_fft_scalar);
				224	/* sin(x) ~= x here */
				225	#ifdef FIXED_POINT
				226	sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
				227	#else
				228	sine = (kiss_twiddle_scalar)2PI(.125f)/N;
				229	#endif
				230
				231	/* Pre-rotate */
				232	{
				233	/* Temp pointers to make it really clear to the compiler what we're doing */
				234	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
				235	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
				236	kiss_fft_scalar * OPUS_RESTRICT yp = f2;
				237	const kiss_twiddle_scalar *t = &l->trig[0];
				238	for(i=0;i<N4;i++)
				239	{
				240	kiss_fft_scalar yr, yi;
				241	yr = -S_MUL(xp2, t[i<<shift]) + S_MUL(xp1,t[(N4-i)<<shift]);
				242	yi = -S_MUL(xp2, t[(N4-i)<<shift]) - S_MUL(xp1,t[i<<shift]);
				243	/* works because the cos is nearly one */
				244	*yp++ = yr - S_MUL(yi,sine);
				245	*yp++ = yi + S_MUL(yr,sine);
				246	xp1+=2*stride;
				247	xp2-=2*stride;
				248	}
				249	}
				250
				251	/* Inverse N/4 complex FFT. This one should not downscale even in fixed-point */
				252	opus_ifft(l->kfft[shift], (kiss_fft_cpx )f2, (kiss_fft_cpx )f);
				253
				254	/* Post-rotate */
				255	{
				256	kiss_fft_scalar * OPUS_RESTRICT fp = f;
				257	const kiss_twiddle_scalar *t = &l->trig[0];
				258
				259	for(i=0;i<N4;i++)
				260	{
				261	kiss_fft_scalar re, im, yr, yi;
				262	re = fp[0];
				263	im = fp[1];
				264	/* We'd scale up by 2 here, but instead it's done when mixing the windows */
				265	yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
				266	yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
				267	/* works because the cos is nearly one */
				268	*fp++ = yr - S_MUL(yi,sine);
				269	*fp++ = yi + S_MUL(yr,sine);
				270	}
				271	}
				272	/* De-shuffle the components for the middle of the window only */
				273	{
				274	const kiss_fft_scalar * OPUS_RESTRICT fp1 = f;
				275	const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1;
				276	kiss_fft_scalar * OPUS_RESTRICT yp = f2;
				277	for(i = 0; i < N4; i++)
				278	{
				279	yp++ =-fp1;
				280	yp++ = fp2;
				281	fp1 += 2;
				282	fp2 -= 2;
				283	}
				284	}
				285	out -= (N2-overlap)>>1;
				286	/* Mirror on both sides for TDAC */
				287	{
				288	kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1;
				289	kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1;
				290	kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2;
				291	const opus_val16 * OPUS_RESTRICT wp1 = window;
				292	const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
				293	for(i = 0; i< N4-overlap/2; i++)
				294	{
				295	xp1 = fp1;
				296	xp1--;
				297	fp1--;
				298	}
				299	for(; i < N4; i++)
				300	{
				301	kiss_fft_scalar x1;
				302	x1 = *fp1--;
				303	yp1++ +=-MULT16_32_Q15(wp1, x1);
				304	xp1-- += MULT16_32_Q15(wp2, x1);
				305	wp1++;
				306	wp2--;
				307	}
				308	}
				309	{
				310	kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4;
				311	kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2;
				312	kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2);
				313	const opus_val16 * OPUS_RESTRICT wp1 = window;
				314	const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
				315	for(i = 0; i< N4-overlap/2; i++)
				316	{
				317	xp2 = fp2;
				318	xp2++;
				319	fp2++;
				320	}
				321	for(; i < N4; i++)
				322	{
				323	kiss_fft_scalar x2;
				324	x2 = *fp2++;
				325	yp2-- = MULT16_32_Q15(wp1, x2);
				326	xp2++ = MULT16_32_Q15(wp2, x2);
				327	wp1++;
				328	wp2--;
				329	}
				330	}
				331	RESTORE_STACK;
				332	}