blob: ecc338cec65d6e32fbe09f7c6312a275329de6d5 [file] [log] [blame]
Alexandre Lision744f7422013-09-25 11:39:37 -04001/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31#ifdef FIXED_POINT
32#include "main_FIX.h"
33#define silk_encoder_state_Fxx silk_encoder_state_FIX
34#else
35#include "main_FLP.h"
36#define silk_encoder_state_Fxx silk_encoder_state_FLP
37#endif
38#include "tuning_parameters.h"
39#include "pitch_est_defines.h"
40
41static opus_int silk_setup_resamplers(
42 silk_encoder_state_Fxx *psEnc, /* I/O */
43 opus_int fs_kHz /* I */
44);
45
46static opus_int silk_setup_fs(
47 silk_encoder_state_Fxx *psEnc, /* I/O */
48 opus_int fs_kHz, /* I */
49 opus_int PacketSize_ms /* I */
50);
51
52static opus_int silk_setup_complexity(
53 silk_encoder_state *psEncC, /* I/O */
54 opus_int Complexity /* I */
55);
56
57static inline opus_int silk_setup_LBRR(
58 silk_encoder_state *psEncC, /* I/O */
59 const opus_int32 TargetRate_bps /* I */
60);
61
62
63/* Control encoder */
64opus_int silk_control_encoder(
65 silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */
66 silk_EncControlStruct *encControl, /* I Control structure */
67 const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */
68 const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */
69 const opus_int channelNb, /* I Channel number */
70 const opus_int force_fs_kHz
71)
72{
73 opus_int fs_kHz, ret = 0;
74
75 psEnc->sCmn.useDTX = encControl->useDTX;
76 psEnc->sCmn.useCBR = encControl->useCBR;
77 psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate;
78 psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate;
79 psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate;
80 psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate;
81 psEnc->sCmn.useInBandFEC = encControl->useInBandFEC;
82 psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI;
83 psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal;
84 psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch;
85 psEnc->sCmn.channelNb = channelNb;
86
87 if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) {
88 if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) {
89 /* Change in API sampling rate in the middle of encoding a packet */
90 ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz );
91 }
92 return ret;
93 }
94
95 /* Beyond this point we know that there are no previously coded frames in the payload buffer */
96
97 /********************************************/
98 /* Determine internal sampling rate */
99 /********************************************/
100 fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl );
101 if( force_fs_kHz ) {
102 fs_kHz = force_fs_kHz;
103 }
104 /********************************************/
105 /* Prepare resampler and buffered data */
106 /********************************************/
107 ret += silk_setup_resamplers( psEnc, fs_kHz );
108
109 /********************************************/
110 /* Set internal sampling frequency */
111 /********************************************/
112 ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms );
113
114 /********************************************/
115 /* Set encoding complexity */
116 /********************************************/
117 ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity );
118
119 /********************************************/
120 /* Set packet loss rate measured by farend */
121 /********************************************/
122 psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage;
123
124 /********************************************/
125 /* Set LBRR usage */
126 /********************************************/
127 ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps );
128
129 psEnc->sCmn.controlled_since_last_payload = 1;
130
131 return ret;
132}
133
134static opus_int silk_setup_resamplers(
135 silk_encoder_state_Fxx *psEnc, /* I/O */
136 opus_int fs_kHz /* I */
137)
138{
139 opus_int ret = SILK_NO_ERROR;
140 opus_int32 nSamples_temp;
141
142 if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz )
143 {
144 if( psEnc->sCmn.fs_kHz == 0 ) {
145 /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
146 ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 );
147 } else {
148 /* Allocate worst case space for temporary upsampling, 8 to 48 kHz, so a factor 6 */
149 opus_int16 x_buf_API_fs_Hz[ ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * MAX_API_FS_KHZ ];
150 silk_resampler_state_struct temp_resampler_state;
151#ifdef FIXED_POINT
152 opus_int16 *x_bufFIX = psEnc->x_buf;
153#else
154 opus_int16 x_bufFIX[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
155#endif
156
157 nSamples_temp = silk_LSHIFT( psEnc->sCmn.frame_length, 1 ) + LA_SHAPE_MS * psEnc->sCmn.fs_kHz;
158
159#ifndef FIXED_POINT
160 silk_float2short_array( x_bufFIX, psEnc->x_buf, nSamples_temp );
161#endif
162
163 /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
164 ret += silk_resampler_init( &temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );
165
166 /* Temporary resampling of x_buf data to API_fs_Hz */
167 ret += silk_resampler( &temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, nSamples_temp );
168
169 /* Calculate number of samples that has been temporarily upsampled */
170 nSamples_temp = silk_DIV32_16( nSamples_temp * psEnc->sCmn.API_fs_Hz, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ) );
171
172 /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
173 ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 );
174
175 /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */
176 ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, nSamples_temp );
177
178#ifndef FIXED_POINT
179 silk_short2float_array( psEnc->x_buf, x_bufFIX, ( 2 * MAX_FRAME_LENGTH_MS + LA_SHAPE_MS ) * fs_kHz );
180#endif
181 }
182 }
183
184 psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz;
185
186 return ret;
187}
188
189static opus_int silk_setup_fs(
190 silk_encoder_state_Fxx *psEnc, /* I/O */
191 opus_int fs_kHz, /* I */
192 opus_int PacketSize_ms /* I */
193)
194{
195 opus_int ret = SILK_NO_ERROR;
196
197 /* Set packet size */
198 if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) {
199 if( ( PacketSize_ms != 10 ) &&
200 ( PacketSize_ms != 20 ) &&
201 ( PacketSize_ms != 40 ) &&
202 ( PacketSize_ms != 60 ) ) {
203 ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
204 }
205 if( PacketSize_ms <= 10 ) {
206 psEnc->sCmn.nFramesPerPacket = 1;
207 psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1;
208 psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz );
209 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
210 if( psEnc->sCmn.fs_kHz == 8 ) {
211 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
212 } else {
213 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
214 }
215 } else {
216 psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS );
217 psEnc->sCmn.nb_subfr = MAX_NB_SUBFR;
218 psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz );
219 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
220 if( psEnc->sCmn.fs_kHz == 8 ) {
221 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
222 } else {
223 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
224 }
225 }
226 psEnc->sCmn.PacketSize_ms = PacketSize_ms;
227 psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */
228 }
229
230 /* Set internal sampling frequency */
231 silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
232 silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
233 if( psEnc->sCmn.fs_kHz != fs_kHz ) {
234 /* reset part of the state */
235 silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) );
236 silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) );
237 silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) );
238 silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
239 silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
240 psEnc->sCmn.inputBufIx = 0;
241 psEnc->sCmn.nFramesEncoded = 0;
242 psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */
243
244 /* Initialize non-zero parameters */
245 psEnc->sCmn.prevLag = 100;
246 psEnc->sCmn.first_frame_after_reset = 1;
247 psEnc->sPrefilt.lagPrev = 100;
248 psEnc->sShape.LastGainIndex = 10;
249 psEnc->sCmn.sNSQ.lagPrev = 100;
250 psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536;
251 psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
252
253 psEnc->sCmn.fs_kHz = fs_kHz;
254 if( psEnc->sCmn.fs_kHz == 8 ) {
255 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
256 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
257 } else {
258 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
259 }
260 } else {
261 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
262 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
263 } else {
264 psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
265 }
266 }
267 if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) {
268 psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER;
269 psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB;
270 } else {
271 psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER;
272 psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB;
273 }
274 psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz;
275 psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );
276 psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
277 psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz );
278 psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz );
279 if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
280 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
281 } else {
282 psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
283 }
284 if( psEnc->sCmn.fs_kHz == 16 ) {
285 psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 );
286 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
287 } else if( psEnc->sCmn.fs_kHz == 12 ) {
288 psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 );
289 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
290 } else {
291 psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 );
292 psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
293 }
294 }
295
296 /* Check that settings are valid */
297 silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length );
298
299 return ret;
300}
301
302static opus_int silk_setup_complexity(
303 silk_encoder_state *psEncC, /* I/O */
304 opus_int Complexity /* I */
305)
306{
307 opus_int ret = 0;
308
309 /* Set encoding complexity */
310 silk_assert( Complexity >= 0 && Complexity <= 10 );
311 if( Complexity < 2 ) {
312 psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX;
313 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 );
314 psEncC->pitchEstimationLPCOrder = 6;
315 psEncC->shapingLPCOrder = 8;
316 psEncC->la_shape = 3 * psEncC->fs_kHz;
317 psEncC->nStatesDelayedDecision = 1;
318 psEncC->useInterpolatedNLSFs = 0;
319 psEncC->LTPQuantLowComplexity = 1;
320 psEncC->NLSF_MSVQ_Survivors = 2;
321 psEncC->warping_Q16 = 0;
322 } else if( Complexity < 4 ) {
323 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
324 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 );
325 psEncC->pitchEstimationLPCOrder = 8;
326 psEncC->shapingLPCOrder = 10;
327 psEncC->la_shape = 5 * psEncC->fs_kHz;
328 psEncC->nStatesDelayedDecision = 1;
329 psEncC->useInterpolatedNLSFs = 0;
330 psEncC->LTPQuantLowComplexity = 0;
331 psEncC->NLSF_MSVQ_Survivors = 4;
332 psEncC->warping_Q16 = 0;
333 } else if( Complexity < 6 ) {
334 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
335 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 );
336 psEncC->pitchEstimationLPCOrder = 10;
337 psEncC->shapingLPCOrder = 12;
338 psEncC->la_shape = 5 * psEncC->fs_kHz;
339 psEncC->nStatesDelayedDecision = 2;
340 psEncC->useInterpolatedNLSFs = 1;
341 psEncC->LTPQuantLowComplexity = 0;
342 psEncC->NLSF_MSVQ_Survivors = 8;
343 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
344 } else if( Complexity < 8 ) {
345 psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
346 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 );
347 psEncC->pitchEstimationLPCOrder = 12;
348 psEncC->shapingLPCOrder = 14;
349 psEncC->la_shape = 5 * psEncC->fs_kHz;
350 psEncC->nStatesDelayedDecision = 3;
351 psEncC->useInterpolatedNLSFs = 1;
352 psEncC->LTPQuantLowComplexity = 0;
353 psEncC->NLSF_MSVQ_Survivors = 16;
354 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
355 } else {
356 psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX;
357 psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 );
358 psEncC->pitchEstimationLPCOrder = 16;
359 psEncC->shapingLPCOrder = 16;
360 psEncC->la_shape = 5 * psEncC->fs_kHz;
361 psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES;
362 psEncC->useInterpolatedNLSFs = 1;
363 psEncC->LTPQuantLowComplexity = 0;
364 psEncC->NLSF_MSVQ_Survivors = 32;
365 psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
366 }
367
368 /* Do not allow higher pitch estimation LPC order than predict LPC order */
369 psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder );
370 psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape;
371 psEncC->Complexity = Complexity;
372
373 silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER );
374 silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER );
375 silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES );
376 silk_assert( psEncC->warping_Q16 <= 32767 );
377 silk_assert( psEncC->la_shape <= LA_SHAPE_MAX );
378 silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX );
379 silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS );
380
381 return ret;
382}
383
384static inline opus_int silk_setup_LBRR(
385 silk_encoder_state *psEncC, /* I/O */
386 const opus_int32 TargetRate_bps /* I */
387)
388{
389 opus_int ret = SILK_NO_ERROR;
390 opus_int32 LBRR_rate_thres_bps;
391
392 psEncC->LBRR_enabled = 0;
393 if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) {
394 if( psEncC->fs_kHz == 8 ) {
395 LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS;
396 } else if( psEncC->fs_kHz == 12 ) {
397 LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS;
398 } else {
399 LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS;
400 }
401 LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) );
402
403 if( TargetRate_bps > LBRR_rate_thres_bps ) {
404 /* Set gain increase for coding LBRR excitation */
405 psEncC->LBRR_enabled = 1;
406 psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
407 }
408 }
409
410 return ret;
411}