blob: a37a9f21299587bd292a2c6b520722e3e3b268bd [file] [log] [blame]
Alexandre Lision744f7422013-09-25 11:39:37 -04001/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "main_FIX.h"
33#include "tuning_parameters.h"
34
35/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
36static inline void silk_LBRR_encode_FIX(
37 silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
38 silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */
39 const opus_int32 xfw_Q3[], /* I Input signal */
40 opus_int condCoding /* I The type of conditional coding used so far for this frame */
41);
42
43void silk_encode_do_VAD_FIX(
44 silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */
45)
46{
47 /****************************/
48 /* Voice Activity Detection */
49 /****************************/
50 silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
51
52 /**************************************************/
53 /* Convert speech activity into VAD and DTX flags */
54 /**************************************************/
55 if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
56 psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
57 psEnc->sCmn.noSpeechCounter++;
58 if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
59 psEnc->sCmn.inDTX = 0;
60 } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
61 psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
62 psEnc->sCmn.inDTX = 0;
63 }
64 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
65 } else {
66 psEnc->sCmn.noSpeechCounter = 0;
67 psEnc->sCmn.inDTX = 0;
68 psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
69 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
70 }
71}
72
73/****************/
74/* Encode frame */
75/****************/
76opus_int silk_encode_frame_FIX(
77 silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
78 opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */
79 ec_enc *psRangeEnc, /* I/O compressor data structure */
80 opus_int condCoding, /* I The type of conditional coding to use */
81 opus_int maxBits, /* I If > 0: maximum number of output bits */
82 opus_int useCBR /* I Flag to force constant-bitrate operation */
83)
84{
85 silk_encoder_control_FIX sEncCtrl;
86 opus_int i, iter, maxIter, found_upper, found_lower, ret = 0;
87 opus_int16 *x_frame, *res_pitch_frame;
88 opus_int32 xfw_Q3[ MAX_FRAME_LENGTH ];
89 opus_int16 res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
90 ec_enc sRangeEnc_copy, sRangeEnc_copy2;
91 silk_nsq_state sNSQ_copy, sNSQ_copy2;
92 opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
93 opus_int32 gainsID, gainsID_lower, gainsID_upper;
94 opus_int16 gainMult_Q8;
95 opus_int16 ec_prevLagIndex_copy;
96 opus_int ec_prevSignalType_copy;
97 opus_int8 LastGainIndex_copy2;
98 opus_uint8 ec_buf_copy[ 1275 ];
99
100 /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
101 LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
102
103 psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
104
105 /**************************************************************/
106 /* Set up Input Pointers, and insert frame in input buffer */
107 /*************************************************************/
108 /* pointers aligned with start of frame to encode */
109 x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
110 res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
111
112 /***************************************/
113 /* Ensure smooth bandwidth transitions */
114 /***************************************/
115 silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
116
117 /*******************************************/
118 /* Copy new frame to front of input buffer */
119 /*******************************************/
120 silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );
121
122 if( !psEnc->sCmn.prefillFlag ) {
123 /*****************************************/
124 /* Find pitch lags, initial LPC analysis */
125 /*****************************************/
126 silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame );
127
128 /************************/
129 /* Noise shape analysis */
130 /************************/
131 silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
132
133 /***************************************************/
134 /* Find linear prediction coefficients (LPC + LTP) */
135 /***************************************************/
136 silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
137
138 /****************************************/
139 /* Process gains */
140 /****************************************/
141 silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding );
142
143 /*****************************************/
144 /* Prefiltering for noise shaper */
145 /*****************************************/
146 silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );
147
148 /****************************************/
149 /* Low Bitrate Redundant Encoding */
150 /****************************************/
151 silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding );
152
153 /* Loop over quantizer and entropy coding to control bitrate */
154 maxIter = 6;
155 gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
156 found_lower = 0;
157 found_upper = 0;
158 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
159 gainsID_lower = -1;
160 gainsID_upper = -1;
161 /* Copy part of the input state */
162 silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
163 silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
164 seed_copy = psEnc->sCmn.indices.Seed;
165 ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
166 ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
167 for( iter = 0; ; iter++ ) {
168 if( gainsID == gainsID_lower ) {
169 nBits = nBits_lower;
170 } else if( gainsID == gainsID_upper ) {
171 nBits = nBits_upper;
172 } else {
173 /* Restore part of the input state */
174 if( iter > 0 ) {
175 silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
176 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
177 psEnc->sCmn.indices.Seed = seed_copy;
178 psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
179 psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
180 }
181
182 /*****************************************/
183 /* Noise shaping quantization */
184 /*****************************************/
185 if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
186 silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
187 sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
188 sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
189 } else {
190 silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
191 sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
192 sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
193 }
194
195 /****************************************/
196 /* Encode Parameters */
197 /****************************************/
198 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
199
200 /****************************************/
201 /* Encode Excitation Signal */
202 /****************************************/
203 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
204 psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
205
206 nBits = ec_tell( psRangeEnc );
207
208 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
209 break;
210 }
211 }
212
213 if( iter == maxIter ) {
214 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
215 /* Restore output state from earlier iteration that did meet the bitrate budget */
216 silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
217 silk_assert( sRangeEnc_copy2.offs <= 1275 );
218 silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
219 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
220 psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
221 }
222 break;
223 }
224
225 if( nBits > maxBits ) {
226 if( found_lower == 0 && iter >= 2 ) {
227 /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
228 sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 );
229 found_upper = 0;
230 gainsID_upper = -1;
231 } else {
232 found_upper = 1;
233 nBits_upper = nBits;
234 gainMult_upper = gainMult_Q8;
235 gainsID_upper = gainsID;
236 }
237 } else if( nBits < maxBits - 5 ) {
238 found_lower = 1;
239 nBits_lower = nBits;
240 gainMult_lower = gainMult_Q8;
241 if( gainsID != gainsID_lower ) {
242 gainsID_lower = gainsID;
243 /* Copy part of the output state */
244 silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
245 silk_assert( psRangeEnc->offs <= 1275 );
246 silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
247 silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
248 LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
249 }
250 } else {
251 /* Within 5 bits of budget: close enough */
252 break;
253 }
254
255 if( ( found_lower & found_upper ) == 0 ) {
256 /* Adjust gain according to high-rate rate/distortion curve */
257 opus_int32 gain_factor_Q16;
258 gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
259 gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
260 if( nBits > maxBits ) {
261 gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
262 }
263 gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
264 } else {
265 /* Adjust gain by interpolating */
266 gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
267 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
268 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
269 gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
270 } else
271 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
272 gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
273 }
274 }
275
276 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
277 sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
278 }
279
280 /* Quantize gains */
281 psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
282 silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
283 &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
284
285 /* Unique identifier of gains vector */
286 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
287 }
288 }
289
290 /* Update input buffer */
291 silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
292 ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) );
293
294 /* Parameters needed for next frame */
295 psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
296 psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
297
298 /* Exit without entropy coding */
299 if( psEnc->sCmn.prefillFlag ) {
300 /* No payload */
301 *pnBytesOut = 0;
302 return ret;
303 }
304
305 /****************************************/
306 /* Finalize payload */
307 /****************************************/
308 psEnc->sCmn.first_frame_after_reset = 0;
309 /* Payload size */
310 *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
311
312 return ret;
313}
314
315/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */
316static inline void silk_LBRR_encode_FIX(
317 silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */
318 silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */
319 const opus_int32 xfw_Q3[], /* I Input signal */
320 opus_int condCoding /* I The type of conditional coding used so far for this frame */
321)
322{
323 opus_int32 TempGains_Q16[ MAX_NB_SUBFR ];
324 SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
325 silk_nsq_state sNSQ_LBRR;
326
327 /*******************************************/
328 /* Control use of inband LBRR */
329 /*******************************************/
330 if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
331 psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
332
333 /* Copy noise shaping quantizer state and quantization indices from regular encoding */
334 silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
335 silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
336
337 /* Save original gains */
338 silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
339
340 if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
341 /* First frame in packet or previous frame not LBRR coded */
342 psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
343
344 /* Increase Gains to get target LBRR rate */
345 psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases;
346 psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
347 }
348
349 /* Decode to get gains in sync with decoder */
350 /* Overwrite unquantized gains with quantized gains */
351 silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices,
352 &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
353
354 /*****************************************/
355 /* Noise shaping quantization */
356 /*****************************************/
357 if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
358 silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
359 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
360 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
361 psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
362 } else {
363 silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
364 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
365 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
366 psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
367 }
368
369 /* Restore original gains */
370 silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
371 }
372}