Blame - jni/libopus/sources/src/analysis.c - jami-client-android

blob: 778a62aabfc29470c5c6b4f51b6ef97852ca3d8e [file] [log] [blame]

Alexandre Lision	8538238	2014-01-27 15:54:16 -0500	[diff] [blame]	1	/* Copyright (c) 2011 Xiph.Org Foundation
				2	Written by Jean-Marc Valin */
				3	/*
				4	Redistribution and use in source and binary forms, with or without
				5	modification, are permitted provided that the following conditions
				6	are met:
				7
				8	- Redistributions of source code must retain the above copyright
				9	notice, this list of conditions and the following disclaimer.
				10
				11	- Redistributions in binary form must reproduce the above copyright
				12	notice, this list of conditions and the following disclaimer in the
				13	documentation and/or other materials provided with the distribution.
				14
				15	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				16	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				17	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				18	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
				19	CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
				20	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
				21	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
				22	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
				23	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
				24	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
				25	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26	*/
				27
				28	#ifdef HAVE_CONFIG_H
				29	#include "config.h"
				30	#endif
				31
				32	#include "kiss_fft.h"
				33	#include "celt.h"
				34	#include "modes.h"
				35	#include "arch.h"
				36	#include "quant_bands.h"
				37	#include <stdio.h>
				38	#include "analysis.h"
				39	#include "mlp.h"
				40	#include "stack_alloc.h"
				41
				42	extern const MLP net;
				43
				44	#ifndef M_PI
				45	#define M_PI 3.141592653
				46	#endif
				47
				48	static const float dct_table[128] = {
				49	0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
				50	0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
				51	0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
				52	-0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
				53	0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
				54	-0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
				55	0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
				56	0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
				57	0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
				58	0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
				59	0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
				60	-0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
				61	0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
				62	-0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
				63	0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
				64	0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
				65	};
				66
				67	static const float analysis_window[240] = {
				68	0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
				69	0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
				70	0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
				71	0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
				72	0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
				73	0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
				74	0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
				75	0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
				76	0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
				77	0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
				78	0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
				79	0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
				80	0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
				81	0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
				82	0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
				83	0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
				84	0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
				85	0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
				86	0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
				87	0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
				88	0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
				89	0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
				90	0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
				91	0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
				92	0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
				93	0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
				94	0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
				95	0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
				96	0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
				97	0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
				98	};
				99
				100	static const int tbands[NB_TBANDS+1] = {
				101	2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
				102	};
				103
				104	static const int extra_bands[NB_TOT_BANDS+1] = {
				105	1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
				106	};
				107
				108	/*static const float tweight[NB_TBANDS+1] = {
				109	.3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
				110	};*/
				111
				112	#define NB_TONAL_SKIP_BANDS 9
				113
				114	#define cA 0.43157974f
				115	#define cB 0.67848403f
				116	#define cC 0.08595542f
				117	#define cE ((float)M_PI/2)
				118	static OPUS_INLINE float fast_atan2f(float y, float x) {
				119	float x2, y2;
				120	/* Should avoid underflow on the values we'll get */
				121	if (ABS16(x)+ABS16(y)<1e-9f)
				122	{
				123	x*=1e12f;
				124	y*=1e12f;
				125	}
				126	x2 = x*x;
				127	y2 = y*y;
				128	if(x2<y2){
				129	float den = (y2 + cBx2) (y2 + cC*x2);
				130	if (den!=0)
				131	return -xy(y2 + cA*x2) / den + (y<0 ? -cE : cE);
				132	else
				133	return (y<0 ? -cE : cE);
				134	}else{
				135	float den = (x2 + cBy2) (x2 + cC*y2);
				136	if (den!=0)
				137	return xy(x2 + cAy2) / den + (y<0 ? -cE : cE) - (xy<0 ? -cE : cE);
				138	else
				139	return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
				140	}
				141	}
				142
				143	void tonality_get_info(TonalityAnalysisState tonal, AnalysisInfo info_out, int len)
				144	{
				145	int pos;
				146	int curr_lookahead;
				147	float psum;
				148	int i;
				149
				150	pos = tonal->read_pos;
				151	curr_lookahead = tonal->write_pos-tonal->read_pos;
				152	if (curr_lookahead<0)
				153	curr_lookahead += DETECT_SIZE;
				154
				155	if (len > 480 && pos != tonal->write_pos)
				156	{
				157	pos++;
				158	if (pos==DETECT_SIZE)
				159	pos=0;
				160	}
				161	if (pos == tonal->write_pos)
				162	pos--;
				163	if (pos<0)
				164	pos = DETECT_SIZE-1;
				165	OPUS_COPY(info_out, &tonal->info[pos], 1);
				166	tonal->read_subframe += len/120;
				167	while (tonal->read_subframe>=4)
				168	{
				169	tonal->read_subframe -= 4;
				170	tonal->read_pos++;
				171	}
				172	if (tonal->read_pos>=DETECT_SIZE)
				173	tonal->read_pos-=DETECT_SIZE;
				174
				175	/* Compensate for the delay in the features themselves.
				176	FIXME: Need a better estimate the 10 I just made up */
				177	curr_lookahead = IMAX(curr_lookahead-10, 0);
				178
				179	psum=0;
				180	/* Summing the probability of transition patterns that involve music at
				181	time (DETECT_SIZE-curr_lookahead-1) */
				182	for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
				183	psum += tonal->pmusic[i];
				184	for (;i<DETECT_SIZE;i++)
				185	psum += tonal->pspeech[i];
				186	psum = psumtonal->music_confidence + (1-psum)tonal->speech_confidence;
				187	/printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);/
				188
				189	info_out->music_prob = psum;
				190	}
				191
				192	void tonality_analysis(TonalityAnalysisState tonal, AnalysisInfo info_out, const CELTMode celt_mode, const void x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
				193	{
				194	int i, b;
				195	const kiss_fft_state *kfft;
				196	VARDECL(kiss_fft_cpx, in);
				197	VARDECL(kiss_fft_cpx, out);
				198	int N = 480, N2=240;
				199	float * OPUS_RESTRICT A = tonal->angle;
				200	float * OPUS_RESTRICT dA = tonal->d_angle;
				201	float * OPUS_RESTRICT d2A = tonal->d2_angle;
				202	VARDECL(float, tonality);
				203	VARDECL(float, noisiness);
				204	float band_tonality[NB_TBANDS];
				205	float logE[NB_TBANDS];
				206	float BFCC[8];
				207	float features[25];
				208	float frame_tonality;
				209	float max_frame_tonality;
				210	/float tw_sum=0;/
				211	float frame_noisiness;
				212	const float pi4 = (float)(M_PIM_PIM_PI*M_PI);
				213	float slope=0;
				214	float frame_stationarity;
				215	float relativeE;
				216	float frame_probs[2];
				217	float alpha, alphaE, alphaE2;
				218	float frame_loudness;
				219	float bandwidth_mask;
				220	int bandwidth=0;
				221	float maxE = 0;
				222	float noise_floor;
				223	int remaining;
				224	AnalysisInfo *info;
				225	SAVE_STACK;
				226
				227	tonal->last_transition++;
				228	alpha = 1.f/IMIN(20, 1+tonal->count);
				229	alphaE = 1.f/IMIN(50, 1+tonal->count);
				230	alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
				231
				232	if (tonal->count<4)
				233	tonal->music_prob = .5;
				234	kfft = celt_mode->mdct.kfft[0];
				235	if (tonal->count==0)
				236	tonal->mem_fill = 240;
				237	downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
				238	if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
				239	{
				240	tonal->mem_fill += len;
				241	/* Don't have enough to update the analysis */
				242	RESTORE_STACK;
				243	return;
				244	}
				245	info = &tonal->info[tonal->write_pos++];
				246	if (tonal->write_pos>=DETECT_SIZE)
				247	tonal->write_pos-=DETECT_SIZE;
				248
				249	ALLOC(in, 480, kiss_fft_cpx);
				250	ALLOC(out, 480, kiss_fft_cpx);
				251	ALLOC(tonality, 240, float);
				252	ALLOC(noisiness, 240, float);
				253	for (i=0;i<N2;i++)
				254	{
				255	float w = analysis_window[i];
				256	in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
				257	in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
				258	in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
				259	in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
				260	}
				261	OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
				262	remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
				263	downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
				264	tonal->mem_fill = 240 + remaining;
				265	opus_fft(kfft, in, out);
				266
				267	for (i=1;i<N2;i++)
				268	{
				269	float X1r, X2r, X1i, X2i;
				270	float angle, d_angle, d2_angle;
				271	float angle2, d_angle2, d2_angle2;
				272	float mod1, mod2, avg_mod;
				273	X1r = (float)out[i].r+out[N-i].r;
				274	X1i = (float)out[i].i-out[N-i].i;
				275	X2r = (float)out[i].i+out[N-i].i;
				276	X2i = (float)out[N-i].r-out[i].r;
				277
				278	angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
				279	d_angle = angle - A[i];
				280	d2_angle = d_angle - dA[i];
				281
				282	angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
				283	d_angle2 = angle2 - angle;
				284	d2_angle2 = d_angle2 - d_angle;
				285
				286	mod1 = d2_angle - (float)floor(.5+d2_angle);
				287	noisiness[i] = ABS16(mod1);
				288	mod1 *= mod1;
				289	mod1 *= mod1;
				290
				291	mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
				292	noisiness[i] += ABS16(mod2);
				293	mod2 *= mod2;
				294	mod2 *= mod2;
				295
				296	avg_mod = .25f(d2A[i]+2.fmod1+mod2);
				297	tonality[i] = 1.f/(1.f+40.f16.fpi4*avg_mod)-.015f;
				298
				299	A[i] = angle2;
				300	dA[i] = d_angle2;
				301	d2A[i] = mod2;
				302	}
				303
				304	frame_tonality = 0;
				305	max_frame_tonality = 0;
				306	/tw_sum = 0;/
				307	info->activity = 0;
				308	frame_noisiness = 0;
				309	frame_stationarity = 0;
				310	if (!tonal->count)
				311	{
				312	for (b=0;b<NB_TBANDS;b++)
				313	{
				314	tonal->lowE[b] = 1e10;
				315	tonal->highE[b] = -1e10;
				316	}
				317	}
				318	relativeE = 0;
				319	frame_loudness = 0;
				320	for (b=0;b<NB_TBANDS;b++)
				321	{
				322	float E=0, tE=0, nE=0;
				323	float L1, L2;
				324	float stationarity;
				325	for (i=tbands[b];i<tbands[b+1];i++)
				326	{
				327	float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r
				328	+ out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;
				329	#ifdef FIXED_POINT
				330	/* FIXME: It's probably best to change the BFCC filter initial state instead */
				331	binE *= 5.55e-17f;
				332	#endif
				333	E += binE;
				334	tE += binE*tonality[i];
				335	nE += binE2.f(.5f-noisiness[i]);
				336	}
				337	tonal->E[tonal->E_count][b] = E;
				338	frame_noisiness += nE/(1e-15f+E);
				339
				340	frame_loudness += (float)sqrt(E+1e-10f);
				341	logE[b] = (float)log(E+1e-10f);
				342	tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
				343	tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
				344	if (tonal->highE[b] < tonal->lowE[b]+1.f)
				345	{
				346	tonal->highE[b]+=.5f;
				347	tonal->lowE[b]-=.5f;
				348	}
				349	relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
				350
				351	L1=L2=0;
				352	for (i=0;i<NB_FRAMES;i++)
				353	{
				354	L1 += (float)sqrt(tonal->E[i][b]);
				355	L2 += tonal->E[i][b];
				356	}
				357
				358	stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
				359	stationarity *= stationarity;
				360	stationarity *= stationarity;
				361	frame_stationarity += stationarity;
				362	/band_tonality[b] = tE/(1e-15+E)/;
				363	band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
				364	#if 0
				365	if (b>=NB_TONAL_SKIP_BANDS)
				366	{
				367	frame_tonality += tweight[b]*band_tonality[b];
				368	tw_sum += tweight[b];
				369	}
				370	#else
				371	frame_tonality += band_tonality[b];
				372	if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
				373	frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
				374	#endif
				375	max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f(b-NB_TBANDS))frame_tonality);
				376	slope += band_tonality[b]*(b-8);
				377	/printf("%f %f ", band_tonality[b], stationarity);/
				378	tonal->prev_band_tonality[b] = band_tonality[b];
				379	}
				380
				381	bandwidth_mask = 0;
				382	bandwidth = 0;
				383	maxE = 0;
				384	noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
				385	#ifdef FIXED_POINT
				386	noise_floor *= 1<<(15+SIG_SHIFT);
				387	#endif
				388	noise_floor *= noise_floor;
				389	for (b=0;b<NB_TOT_BANDS;b++)
				390	{
				391	float E=0;
				392	int band_start, band_end;
				393	/* Keep a margin of 300 Hz for aliasing */
				394	band_start = extra_bands[b];
				395	band_end = extra_bands[b+1];
				396	for (i=band_start;i<band_end;i++)
				397	{
				398	float binE = out[i].r(float)out[i].r + out[N-i].r(float)out[N-i].r
				399	+ out[i].i(float)out[i].i + out[N-i].i(float)out[N-i].i;
				400	E += binE;
				401	}
				402	maxE = MAX32(maxE, E);
				403	tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
				404	E = MAX32(E, tonal->meanE[b]);
				405	/* Use a simple follower with 13 dB/Bark slope for spreading function */
				406	bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
				407	/* Consider the band "active" only if all these conditions are met:
				408	1) less than 10 dB below the simple follower
				409	2) less than 90 dB below the peak band (maximal masking possible considering
				410	both the ATH and the loudness-dependent slope of the spreading function)
				411	3) above the PCM quantization noise floor
				412	*/
				413	if (E>.1bandwidth_mask && E1e9f > maxE && E > noise_floor*(band_end-band_start))
				414	bandwidth = b;
				415	}
				416	if (tonal->count<=2)
				417	bandwidth = 20;
				418	frame_loudness = 20*(float)log10(frame_loudness);
				419	tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
				420	tonal->lowECount *= (1-alphaE);
				421	if (frame_loudness < tonal->Etracker-30)
				422	tonal->lowECount += alphaE;
				423
				424	for (i=0;i<8;i++)
				425	{
				426	float sum=0;
				427	for (b=0;b<16;b++)
				428	sum += dct_table[i16+b]logE[b];
				429	BFCC[i] = sum;
				430	}
				431
				432	frame_stationarity /= NB_TBANDS;
				433	relativeE /= NB_TBANDS;
				434	if (tonal->count<10)
				435	relativeE = .5;
				436	frame_noisiness /= NB_TBANDS;
				437	#if 1
				438	info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
				439	#else
				440	info->activity = .5*(1+frame_noisiness-frame_stationarity);
				441	#endif
				442	frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
				443	frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
				444	tonal->prev_tonality = frame_tonality;
				445
				446	slope /= 8*8;
				447	info->tonality_slope = slope;
				448
				449	tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
				450	tonal->count++;
				451	info->tonality = frame_tonality;
				452
				453	for (i=0;i<4;i++)
				454	features[i] = -0.12299f(BFCC[i]+tonal->mem[i+24]) + 0.49195f(tonal->mem[i]+tonal->mem[i+16]) + 0.69693ftonal->mem[i+8] - 1.4349ftonal->cmean[i];
				455
				456	for (i=0;i<4;i++)
				457	tonal->cmean[i] = (1-alpha)tonal->cmean[i] + alphaBFCC[i];
				458
				459	for (i=0;i<4;i++)
				460	features[4+i] = 0.63246f(BFCC[i]-tonal->mem[i+24]) + 0.31623f(tonal->mem[i]-tonal->mem[i+16]);
				461	for (i=0;i<3;i++)
				462	features[8+i] = 0.53452f(BFCC[i]+tonal->mem[i+24]) - 0.26726f(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
				463
				464	if (tonal->count > 5)
				465	{
				466	for (i=0;i<9;i++)
				467	tonal->std[i] = (1-alpha)tonal->std[i] + alphafeatures[i]*features[i];
				468	}
				469
				470	for (i=0;i<8;i++)
				471	{
				472	tonal->mem[i+24] = tonal->mem[i+16];
				473	tonal->mem[i+16] = tonal->mem[i+8];
				474	tonal->mem[i+8] = tonal->mem[i];
				475	tonal->mem[i] = BFCC[i];
				476	}
				477	for (i=0;i<9;i++)
				478	features[11+i] = (float)sqrt(tonal->std[i]);
				479	features[20] = info->tonality;
				480	features[21] = info->activity;
				481	features[22] = frame_stationarity;
				482	features[23] = info->tonality_slope;
				483	features[24] = tonal->lowECount;
				484
				485	#ifndef DISABLE_FLOAT_API
				486	mlp_process(&net, features, frame_probs);
				487	frame_probs[0] = .5f*(frame_probs[0]+1);
				488	/* Curve fitting between the MLP probability and the actual probability */
				489	frame_probs[0] = .01f + 1.21fframe_probs[0]frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
				490	/* Probability of active audio (as opposed to silence) */
				491	frame_probs[1] = .5f*frame_probs[1]+.5f;
				492	/* Consider that silence has a 50-50 probability. */
				493	frame_probs[0] = frame_probs[1]frame_probs[0] + (1-frame_probs[1]).5f;
				494
				495	/printf("%f %f ", frame_probs[0], frame_probs[1]);/
				496	{
				497	/* Probability of state transition */
				498	float tau;
				499	/* Represents independence of the MLP probabilities, where
				500	beta=1 means fully independent. */
				501	float beta;
				502	/* Denormalized probability of speech (p0) and music (p1) after update */
				503	float p0, p1;
				504	/* Probabilities for "all speech" and "all music" */
				505	float s0, m0;
				506	/* Probability sum for renormalisation */
				507	float psum;
				508	/* Instantaneous probability of speech and music, with beta pre-applied. */
				509	float speech0;
				510	float music0;
				511
				512	/* One transition every 3 minutes of active audio */
				513	tau = .00005f*frame_probs[1];
				514	beta = .05f;
				515	if (1) {
				516	/* Adapt beta based on how "unexpected" the new prob is */
				517	float p, q;
				518	p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
				519	q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
				520	beta = .01f+.05fABS16(p-q)/(p(1-q)+q*(1-p));
				521	}
				522	/* p0 and p1 are the probabilities of speech and music at this frame
				523	using only information from previous frame and applying the
				524	state transition model */
				525	p0 = (1-tonal->music_prob)(1-tau) + tonal->music_prob tau;
				526	p1 = tonal->music_prob (1-tau) + (1-tonal->music_prob)tau;
				527	/* We apply the current probability with exponent beta to work around
				528	the fact that the probability estimates aren't independent. */
				529	p0 *= (float)pow(1-frame_probs[0], beta);
				530	p1 *= (float)pow(frame_probs[0], beta);
				531	/* Normalise the probabilities to get the Marokv probability of music. */
				532	tonal->music_prob = p1/(p0+p1);
				533	info->music_prob = tonal->music_prob;
				534
				535	/* This chunk of code deals with delayed decision. */
				536	psum=1e-20f;
				537	/* Instantaneous probability of speech and music, with beta pre-applied. */
				538	speech0 = (float)pow(1-frame_probs[0], beta);
				539	music0 = (float)pow(frame_probs[0], beta);
				540	if (tonal->count==1)
				541	{
				542	tonal->pspeech[0]=.5;
				543	tonal->pmusic [0]=.5;
				544	}
				545	/* Updated probability of having only speech (s0) or only music (m0),
				546	before considering the new observation. */
				547	s0 = tonal->pspeech[0] + tonal->pspeech[1];
				548	m0 = tonal->pmusic [0] + tonal->pmusic [1];
				549	/* Updates s0 and m0 with instantaneous probability. */
				550	tonal->pspeech[0] = s0(1-tau)speech0;
				551	tonal->pmusic [0] = m0(1-tau)music0;
				552	/* Propagate the transition probabilities */
				553	for (i=1;i<DETECT_SIZE-1;i++)
				554	{
				555	tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
				556	tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
				557	}
				558	/* Probability that the latest frame is speech, when all the previous ones were music. */
				559	tonal->pspeech[DETECT_SIZE-1] = m0tauspeech0;
				560	/* Probability that the latest frame is music, when all the previous ones were speech. */
				561	tonal->pmusic [DETECT_SIZE-1] = s0taumusic0;
				562
				563	/* Renormalise probabilities to 1 */
				564	for (i=0;i<DETECT_SIZE;i++)
				565	psum += tonal->pspeech[i] + tonal->pmusic[i];
				566	psum = 1.f/psum;
				567	for (i=0;i<DETECT_SIZE;i++)
				568	{
				569	tonal->pspeech[i] *= psum;
				570	tonal->pmusic [i] *= psum;
				571	}
				572	psum = tonal->pmusic[0];
				573	for (i=1;i<DETECT_SIZE;i++)
				574	psum += tonal->pspeech[i];
				575
				576	/* Estimate our confidence in the speech/music decisions */
				577	if (frame_probs[1]>.75)
				578	{
				579	if (tonal->music_prob>.9)
				580	{
				581	float adapt;
				582	adapt = 1.f/(++tonal->music_confidence_count);
				583	tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
				584	tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
				585	}
				586	if (tonal->music_prob<.1)
				587	{
				588	float adapt;
				589	adapt = 1.f/(++tonal->speech_confidence_count);
				590	tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
				591	tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
				592	}
				593	} else {
				594	if (tonal->music_confidence_count==0)
				595	tonal->music_confidence = .9f;
				596	if (tonal->speech_confidence_count==0)
				597	tonal->speech_confidence = .1f;
				598	}
				599	}
				600	if (tonal->last_music != (tonal->music_prob>.5f))
				601	tonal->last_transition=0;
				602	tonal->last_music = tonal->music_prob>.5f;
				603	#else
				604	info->music_prob = 0;
				605	#endif
				606	/*for (i=0;i<25;i++)
				607	printf("%f ", features[i]);
				608	printf("\n");*/
				609
				610	info->bandwidth = bandwidth;
				611	/printf("%d %d\n", info->bandwidth, info->opus_bandwidth);/
				612	info->noisiness = frame_noisiness;
				613	info->valid = 1;
				614	if (info_out!=NULL)
				615	OPUS_COPY(info_out, info, 1);
				616	RESTORE_STACK;
				617	}
				618
				619	void run_analysis(TonalityAnalysisState analysis, const CELTMode celt_mode, const void *analysis_pcm,
				620	int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
				621	int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
				622	{
				623	int offset;
				624	int pcm_len;
				625
				626	if (analysis_pcm != NULL)
				627	{
				628	/* Avoid overflow/wrap-around of the analysis buffer */
				629	analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
				630
				631	pcm_len = analysis_frame_size - analysis->analysis_offset;
				632	offset = analysis->analysis_offset;
				633	do {
				634	tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
				635	offset += 480;
				636	pcm_len -= 480;
				637	} while (pcm_len>0);
				638	analysis->analysis_offset = analysis_frame_size;
				639
				640	analysis->analysis_offset -= frame_size;
				641	}
				642
				643	analysis_info->valid = 0;
				644	tonality_get_info(analysis, analysis_info, frame_size);
				645	}