Merge commit '390c89225d'
[opus.git] / silk / dec_API.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "API.h"
32 #include "main.h"
33
34 /************************/
35 /* Decoder Super Struct */
36 /************************/
37 typedef struct {
38     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
39     stereo_dec_state                sStereo;
40     opus_int                         nChannelsAPI;
41     opus_int                         nChannelsInternal;
42     opus_int                         prev_decode_only_middle;
43 } silk_decoder;
44
45 /*********************/
46 /* Decoder functions */
47 /*********************/
48
49 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
50     opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
51 )
52 {
53     opus_int ret = SILK_NO_ERROR;
54
55     *decSizeBytes = sizeof( silk_decoder );
56
57     return ret;
58 }
59
60 /* Reset decoder state */
61 opus_int silk_InitDecoder(                              /* O    Returns error code                              */
62     void                            *decState           /* I/O  State                                           */
63 )
64 {
65     opus_int n, ret = SILK_NO_ERROR;
66     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
67
68     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
69         ret  = silk_init_decoder( &channel_state[ n ] );
70     }
71
72     return ret;
73 }
74
75 /* Decode a frame */
76 opus_int silk_Decode(                                   /* O    Returns error code                              */
77     void*                           decState,           /* I/O  State                                           */
78     silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
79     opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
80     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
81     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
82     opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
83     opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
84 )
85 {
86     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
87     opus_int32 nSamplesOutDec, LBRR_symbol;
88     opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ];
89     opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
90     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
91     opus_int16 *resample_out_ptr;
92     silk_decoder *psDec = ( silk_decoder * )decState;
93     silk_decoder_state *channel_state = psDec->channel_state;
94     opus_int has_side;
95     opus_int stereo_to_mono;
96
97     /**********************************/
98     /* Test if first frame in payload */
99     /**********************************/
100     if( newPacketFlag ) {
101         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
102             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
103         }
104     }
105
106     /* If Mono -> Stereo transition in bitstream: init state of second channel */
107     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
108         ret += silk_init_decoder( &channel_state[ 1 ] );
109     }
110
111     stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
112                      ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
113
114     if( channel_state[ 0 ].nFramesDecoded == 0 ) {
115         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
116             opus_int fs_kHz_dec;
117             if( decControl->payloadSize_ms == 0 ) {
118                 /* Assuming packet loss, use 10 ms */
119                 channel_state[ n ].nFramesPerPacket = 1;
120                 channel_state[ n ].nb_subfr = 2;
121             } else if( decControl->payloadSize_ms == 10 ) {
122                 channel_state[ n ].nFramesPerPacket = 1;
123                 channel_state[ n ].nb_subfr = 2;
124             } else if( decControl->payloadSize_ms == 20 ) {
125                 channel_state[ n ].nFramesPerPacket = 1;
126                 channel_state[ n ].nb_subfr = 4;
127             } else if( decControl->payloadSize_ms == 40 ) {
128                 channel_state[ n ].nFramesPerPacket = 2;
129                 channel_state[ n ].nb_subfr = 4;
130             } else if( decControl->payloadSize_ms == 60 ) {
131                 channel_state[ n ].nFramesPerPacket = 3;
132                 channel_state[ n ].nb_subfr = 4;
133             } else {
134                 silk_assert( 0 );
135                 return SILK_DEC_INVALID_FRAME_SIZE;
136             }
137             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
138             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
139                 silk_assert( 0 );
140                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
141             }
142             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
143         }
144     }
145
146     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
147         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
148         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
149         silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
150     }
151     psDec->nChannelsAPI      = decControl->nChannelsAPI;
152     psDec->nChannelsInternal = decControl->nChannelsInternal;
153
154     if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
155         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
156         return( ret );
157     }
158
159     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
160         /* First decoder call for this payload */
161         /* Decode VAD flags and LBRR flag */
162         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
163             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
164                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
165             }
166             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
167         }
168         /* Decode LBRR flags */
169         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
170             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
171             if( channel_state[ n ].LBRR_flag ) {
172                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
173                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
174                 } else {
175                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
176                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
177                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
178                     }
179                 }
180             }
181         }
182
183         if( lostFlag == FLAG_DECODE_NORMAL ) {
184             /* Regular decoding: skip all LBRR data */
185             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
186                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
187                     if( channel_state[ n ].LBRR_flags[ i ] ) {
188                         opus_int pulses[ MAX_FRAME_LENGTH ];
189                         opus_int condCoding;
190
191                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
192                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
193                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
194                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
195                             }
196                         }
197                         /* Use conditional coding if previous frame available */
198                         if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
199                             condCoding = CODE_CONDITIONALLY;
200                         } else {
201                             condCoding = CODE_INDEPENDENTLY;
202                         }
203                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
204                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
205                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
206                     }
207                 }
208             }
209         }
210     }
211
212     /* Get MS predictor index */
213     if( decControl->nChannelsInternal == 2 ) {
214         if(   lostFlag == FLAG_DECODE_NORMAL ||
215             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
216         {
217             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
218             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
219             if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
220                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
221             {
222                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
223             } else {
224                 decode_only_middle = 0;
225             }
226         } else {
227             for( n = 0; n < 2; n++ ) {
228                 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
229             }
230         }
231     }
232
233     /* Reset side channel decoder prediction memory for first frame with side coding */
234     if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
235         silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
236         silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
237         psDec->channel_state[ 1 ].lagPrev        = 100;
238         psDec->channel_state[ 1 ].LastGainIndex  = 10;
239         psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
240         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
241     }
242
243     if( lostFlag == FLAG_DECODE_NORMAL ) {
244         has_side = !decode_only_middle;
245     } else {
246         has_side = !psDec->prev_decode_only_middle
247               || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
248     }
249     /* Call decoder for one frame */
250     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
251         if( n == 0 || has_side ) {
252             opus_int FrameIndex;
253             opus_int condCoding;
254
255             FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
256             /* Use independent coding if no previous frame available */
257             if( FrameIndex <= 0 ) {
258                 condCoding = CODE_INDEPENDENTLY;
259             } else if( lostFlag == FLAG_DECODE_LBRR ) {
260                 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
261             } else if( n > 0 && psDec->prev_decode_only_middle ) {
262                 /* If we skipped a side frame in this packet, we don't
263                    need LTP scaling; the LTP state is well-defined. */
264                 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
265             } else {
266                 condCoding = CODE_CONDITIONALLY;
267             }
268             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
269         } else {
270             silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
271         }
272         channel_state[ n ].nFramesDecoded++;
273     }
274
275     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
276         /* Convert Mid/Side to Left/Right */
277         silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
278     } else {
279         /* Buffering */
280         silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
281         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
282     }
283
284     /* Number of output samples */
285     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
286
287     /* Set up pointers to temp buffers */
288     if( decControl->nChannelsAPI == 2 ) {
289         resample_out_ptr = samplesOut2_tmp;
290     } else {
291         resample_out_ptr = samplesOut;
292     }
293
294     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
295
296         /* Resample decoded signal to API_sampleRate */
297         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
298
299         /* Interleave if stereo output and stereo stream */
300         if( decControl->nChannelsAPI == 2 ) {
301             for( i = 0; i < *nSamplesOut; i++ ) {
302                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
303             }
304         }
305     }
306
307     /* Create two channel output from mono stream */
308     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
309         if ( stereo_to_mono ){
310             /* Resample right channel for newly collapsed stereo just in case
311                we weren't doing collapsing when switching to mono */
312             ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
313
314             for( i = 0; i < *nSamplesOut; i++ ) {
315                 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
316             }
317         } else {
318             for( i = 0; i < *nSamplesOut; i++ ) {
319                 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
320             }
321         }
322     }
323
324     /* Export pitch lag, measured at 48 kHz sampling rate */
325     if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
326         int mult_tab[ 3 ] = { 6, 4, 3 };
327         decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
328     } else {
329         decControl->prevPitchLag = 0;
330     }
331
332     if( lostFlag == FLAG_PACKET_LOST ) {
333        /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
334           if we lose packets when the energy is going down */
335        for ( i = 0; i < psDec->nChannelsInternal; i++ )
336           psDec->channel_state[ i ].LastGainIndex = 10;
337     } else {
338        psDec->prev_decode_only_middle = decode_only_middle;
339     }
340     return ret;
341 }
342
343 /* Getting table of contents for a packet */
344 opus_int silk_get_TOC(
345     const opus_uint8                *payload,           /* I    Payload data                                */
346     const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
347     const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
348     silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
349 )
350 {
351     opus_int i, flags, ret = SILK_NO_ERROR;
352
353     if( nBytesIn < 1 ) {
354         return -1;
355     }
356     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
357         return -1;
358     }
359
360     silk_memset( Silk_TOC, 0, sizeof( Silk_TOC ) );
361
362     /* For stereo, extract the flags for the mid channel */
363     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
364
365     Silk_TOC->inbandFECFlag = flags & 1;
366     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
367         flags = silk_RSHIFT( flags, 1 );
368         Silk_TOC->VADFlags[ i ] = flags & 1;
369         Silk_TOC->VADFlag |= flags & 1;
370     }
371
372     return ret;
373 }