blob: f1cf639c99de06ad4b254a3cc065438c21120ec0 [file] [log] [blame]
Alexandre Lision744f7422013-09-25 11:39:37 -04001/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31#include "API.h"
32#include "main.h"
33#include "stack_alloc.h"
34
35/************************/
36/* Decoder Super Struct */
37/************************/
38typedef struct {
39 silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ];
40 stereo_dec_state sStereo;
41 opus_int nChannelsAPI;
42 opus_int nChannelsInternal;
43 opus_int prev_decode_only_middle;
44} silk_decoder;
45
46/*********************/
47/* Decoder functions */
48/*********************/
49
50opus_int silk_Get_Decoder_Size( /* O Returns error code */
51 opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
52)
53{
54 opus_int ret = SILK_NO_ERROR;
55
56 *decSizeBytes = sizeof( silk_decoder );
57
58 return ret;
59}
60
61/* Reset decoder state */
62opus_int silk_InitDecoder( /* O Returns error code */
63 void *decState /* I/O State */
64)
65{
66 opus_int n, ret = SILK_NO_ERROR;
67 silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
68
69 for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
70 ret = silk_init_decoder( &channel_state[ n ] );
71 }
72 silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
73 /* Not strictly needed, but it's cleaner that way */
74 ((silk_decoder *)decState)->prev_decode_only_middle = 0;
75
76 return ret;
77}
78
79/* Decode a frame */
80opus_int silk_Decode( /* O Returns error code */
81 void* decState, /* I/O State */
82 silk_DecControlStruct* decControl, /* I/O Control Structure */
83 opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
84 opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
85 ec_dec *psRangeDec, /* I/O Compressor data structure */
86 opus_int16 *samplesOut, /* O Decoded output speech vector */
87 opus_int32 *nSamplesOut /* O Number of samples decoded */
88)
89{
90 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
91 opus_int32 nSamplesOutDec, LBRR_symbol;
92 opus_int16 *samplesOut1_tmp[ 2 ];
93 VARDECL( opus_int16, samplesOut1_tmp_storage );
94 VARDECL( opus_int16, samplesOut2_tmp );
95 opus_int32 MS_pred_Q13[ 2 ] = { 0 };
96 opus_int16 *resample_out_ptr;
97 silk_decoder *psDec = ( silk_decoder * )decState;
98 silk_decoder_state *channel_state = psDec->channel_state;
99 opus_int has_side;
100 opus_int stereo_to_mono;
101 SAVE_STACK;
102
103 /**********************************/
104 /* Test if first frame in payload */
105 /**********************************/
106 if( newPacketFlag ) {
107 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
108 channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */
109 }
110 }
111
112 /* If Mono -> Stereo transition in bitstream: init state of second channel */
113 if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
114 ret += silk_init_decoder( &channel_state[ 1 ] );
115 }
116
117 stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
118 ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
119
120 if( channel_state[ 0 ].nFramesDecoded == 0 ) {
121 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
122 opus_int fs_kHz_dec;
123 if( decControl->payloadSize_ms == 0 ) {
124 /* Assuming packet loss, use 10 ms */
125 channel_state[ n ].nFramesPerPacket = 1;
126 channel_state[ n ].nb_subfr = 2;
127 } else if( decControl->payloadSize_ms == 10 ) {
128 channel_state[ n ].nFramesPerPacket = 1;
129 channel_state[ n ].nb_subfr = 2;
130 } else if( decControl->payloadSize_ms == 20 ) {
131 channel_state[ n ].nFramesPerPacket = 1;
132 channel_state[ n ].nb_subfr = 4;
133 } else if( decControl->payloadSize_ms == 40 ) {
134 channel_state[ n ].nFramesPerPacket = 2;
135 channel_state[ n ].nb_subfr = 4;
136 } else if( decControl->payloadSize_ms == 60 ) {
137 channel_state[ n ].nFramesPerPacket = 3;
138 channel_state[ n ].nb_subfr = 4;
139 } else {
140 silk_assert( 0 );
141 RESTORE_STACK;
142 return SILK_DEC_INVALID_FRAME_SIZE;
143 }
144 fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
145 if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
146 silk_assert( 0 );
147 RESTORE_STACK;
148 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
149 }
150 ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
151 }
152 }
153
154 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
155 silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
156 silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
157 silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
158 }
159 psDec->nChannelsAPI = decControl->nChannelsAPI;
160 psDec->nChannelsInternal = decControl->nChannelsInternal;
161
162 if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
163 ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
164 RESTORE_STACK;
165 return( ret );
166 }
167
168 if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
169 /* First decoder call for this payload */
170 /* Decode VAD flags and LBRR flag */
171 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
172 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
173 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
174 }
175 channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
176 }
177 /* Decode LBRR flags */
178 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
179 silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
180 if( channel_state[ n ].LBRR_flag ) {
181 if( channel_state[ n ].nFramesPerPacket == 1 ) {
182 channel_state[ n ].LBRR_flags[ 0 ] = 1;
183 } else {
184 LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
185 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
186 channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
187 }
188 }
189 }
190 }
191
192 if( lostFlag == FLAG_DECODE_NORMAL ) {
193 /* Regular decoding: skip all LBRR data */
194 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
195 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
196 if( channel_state[ n ].LBRR_flags[ i ] ) {
197 opus_int pulses[ MAX_FRAME_LENGTH ];
198 opus_int condCoding;
199
200 if( decControl->nChannelsInternal == 2 && n == 0 ) {
201 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
202 if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
203 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
204 }
205 }
206 /* Use conditional coding if previous frame available */
207 if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
208 condCoding = CODE_CONDITIONALLY;
209 } else {
210 condCoding = CODE_INDEPENDENTLY;
211 }
212 silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
213 silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
214 channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
215 }
216 }
217 }
218 }
219 }
220
221 /* Get MS predictor index */
222 if( decControl->nChannelsInternal == 2 ) {
223 if( lostFlag == FLAG_DECODE_NORMAL ||
224 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
225 {
226 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
227 /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
228 if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
229 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
230 {
231 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
232 } else {
233 decode_only_middle = 0;
234 }
235 } else {
236 for( n = 0; n < 2; n++ ) {
237 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
238 }
239 }
240 }
241
242 /* Reset side channel decoder prediction memory for first frame with side coding */
243 if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
244 silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
245 silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
246 psDec->channel_state[ 1 ].lagPrev = 100;
247 psDec->channel_state[ 1 ].LastGainIndex = 10;
248 psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
249 psDec->channel_state[ 1 ].first_frame_after_reset = 1;
250 }
251
252 ALLOC( samplesOut1_tmp_storage,
253 decControl->nChannelsInternal*(
254 channel_state[ 0 ].frame_length + 2 ),
255 opus_int16 );
256 samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
257 samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
258 + channel_state[ 0 ].frame_length + 2;
259
260 if( lostFlag == FLAG_DECODE_NORMAL ) {
261 has_side = !decode_only_middle;
262 } else {
263 has_side = !psDec->prev_decode_only_middle
264 || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
265 }
266 /* Call decoder for one frame */
267 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
268 if( n == 0 || has_side ) {
269 opus_int FrameIndex;
270 opus_int condCoding;
271
272 FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
273 /* Use independent coding if no previous frame available */
274 if( FrameIndex <= 0 ) {
275 condCoding = CODE_INDEPENDENTLY;
276 } else if( lostFlag == FLAG_DECODE_LBRR ) {
277 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
278 } else if( n > 0 && psDec->prev_decode_only_middle ) {
279 /* If we skipped a side frame in this packet, we don't
280 need LTP scaling; the LTP state is well-defined. */
281 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
282 } else {
283 condCoding = CODE_CONDITIONALLY;
284 }
285 ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
286 } else {
287 silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
288 }
289 channel_state[ n ].nFramesDecoded++;
290 }
291
292 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
293 /* Convert Mid/Side to Left/Right */
294 silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
295 } else {
296 /* Buffering */
297 silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
298 silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
299 }
300
301 /* Number of output samples */
302 *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
303
304 /* Set up pointers to temp buffers */
305 ALLOC( samplesOut2_tmp,
306 decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
307 if( decControl->nChannelsAPI == 2 ) {
308 resample_out_ptr = samplesOut2_tmp;
309 } else {
310 resample_out_ptr = samplesOut;
311 }
312
313 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
314
315 /* Resample decoded signal to API_sampleRate */
316 ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
317
318 /* Interleave if stereo output and stereo stream */
319 if( decControl->nChannelsAPI == 2 ) {
320 for( i = 0; i < *nSamplesOut; i++ ) {
321 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
322 }
323 }
324 }
325
326 /* Create two channel output from mono stream */
327 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
328 if ( stereo_to_mono ){
329 /* Resample right channel for newly collapsed stereo just in case
330 we weren't doing collapsing when switching to mono */
331 ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
332
333 for( i = 0; i < *nSamplesOut; i++ ) {
334 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
335 }
336 } else {
337 for( i = 0; i < *nSamplesOut; i++ ) {
338 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
339 }
340 }
341 }
342
343 /* Export pitch lag, measured at 48 kHz sampling rate */
344 if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
345 int mult_tab[ 3 ] = { 6, 4, 3 };
346 decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
347 } else {
348 decControl->prevPitchLag = 0;
349 }
350
351 if( lostFlag == FLAG_PACKET_LOST ) {
352 /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
353 if we lose packets when the energy is going down */
354 for ( i = 0; i < psDec->nChannelsInternal; i++ )
355 psDec->channel_state[ i ].LastGainIndex = 10;
356 } else {
357 psDec->prev_decode_only_middle = decode_only_middle;
358 }
359 RESTORE_STACK;
360 return ret;
361}
362
363#if 0
364/* Getting table of contents for a packet */
365opus_int silk_get_TOC(
366 const opus_uint8 *payload, /* I Payload data */
367 const opus_int nBytesIn, /* I Number of input bytes */
368 const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */
369 silk_TOC_struct *Silk_TOC /* O Type of content */
370)
371{
372 opus_int i, flags, ret = SILK_NO_ERROR;
373
374 if( nBytesIn < 1 ) {
375 return -1;
376 }
377 if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
378 return -1;
379 }
380
381 silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
382
383 /* For stereo, extract the flags for the mid channel */
384 flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
385
386 Silk_TOC->inbandFECFlag = flags & 1;
387 for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
388 flags = silk_RSHIFT( flags, 1 );
389 Silk_TOC->VADFlags[ i ] = flags & 1;
390 Silk_TOC->VADFlag |= flags & 1;
391 }
392
393 return ret;
394}
395#endif