675bfb99657008185b51a4394da44efde7107155
[opus.git] / silk / dec_API.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, (subject to the limitations in the disclaimer below)
5 are permitted provided that the following conditions are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Skype Limited, nor the names of specific
12 contributors, may be used to endorse or promote products derived from
13 this software without specific prior written permission.
14 NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15 BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16 CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "API.h"
32 #include "main.h"
33
34 /************************/
35 /* Decoder Super Struct */
36 /************************/
37 typedef struct {
38     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
39     stereo_dec_state                sStereo;
40     opus_int                         nChannelsAPI;
41     opus_int                         nChannelsInternal;
42 } silk_decoder;
43
44 /*********************/
45 /* Decoder functions */
46 /*********************/
47
48 opus_int silk_Get_Decoder_Size( int *decSizeBytes )
49 {
50     opus_int ret = SILK_NO_ERROR;
51
52     *decSizeBytes = sizeof( silk_decoder );
53
54     return ret;
55 }
56
57 /* Reset decoder state */
58 opus_int silk_InitDecoder(
59     void* decState                                      /* I/O: State                                          */
60 )
61 {
62     opus_int n, ret = SILK_NO_ERROR;
63     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
64
65     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
66         ret  = silk_init_decoder( &channel_state[ n ] );
67     }
68
69     return ret;
70 }
71
72 /* Decode a frame */
73 opus_int silk_Decode(
74     void*                               decState,       /* I/O: State                                           */
75     silk_DecControlStruct*      decControl,     /* I/O: Control Structure                               */
76     opus_int                             lostFlag,       /* I:   0: no loss, 1 loss, 2 decode FEC                */
77     opus_int                             newPacketFlag,  /* I:   Indicates first decoder call for this packet    */
78     ec_dec                              *psRangeDec,    /* I/O  Compressor data structure                       */
79     opus_int16                           *samplesOut,    /* O:   Decoded output speech vector                    */
80     opus_int32                           *nSamplesOut    /* O:   Number of samples decoded                       */
81 )
82 {
83     opus_int   i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR;
84     opus_int32 nSamplesOutDec, LBRR_symbol;
85     opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ];
86     opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
87     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
88     opus_int16 *resample_out_ptr;
89     silk_decoder *psDec = ( silk_decoder * )decState;
90     silk_decoder_state *channel_state = psDec->channel_state;
91
92     /**********************************/
93     /* Test if first frame in payload */
94     /**********************************/
95     if( newPacketFlag ) {
96         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
97             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
98         }
99     }
100
101     /* Save previous sample frequency */
102     prev_fs_kHz = channel_state[ 0 ].fs_kHz;
103
104     /* If Mono -> Stereo transition in bitstream: init state of second channel */
105     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
106         ret += silk_init_decoder( &channel_state[ 1 ] );
107         if( psDec->nChannelsAPI == 2 ) {
108             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
109         }
110     }
111
112     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
113         if( channel_state[ n ].nFramesDecoded == 0 ) {
114             opus_int fs_kHz_dec;
115             if( decControl->payloadSize_ms == 0 ) {
116                 /* Assuming packet loss, use 10 ms */
117                 channel_state[ n ].nFramesPerPacket = 1;
118                 channel_state[ n ].nb_subfr = 2;
119             } else if( decControl->payloadSize_ms == 10 ) {
120                 channel_state[ n ].nFramesPerPacket = 1;
121                 channel_state[ n ].nb_subfr = 2;
122             } else if( decControl->payloadSize_ms == 20 ) {
123                 channel_state[ n ].nFramesPerPacket = 1;
124                 channel_state[ n ].nb_subfr = 4;
125             } else if( decControl->payloadSize_ms == 40 ) {
126                 channel_state[ n ].nFramesPerPacket = 2;
127                 channel_state[ n ].nb_subfr = 4;
128             } else if( decControl->payloadSize_ms == 60 ) {
129                 channel_state[ n ].nFramesPerPacket = 3;
130                 channel_state[ n ].nb_subfr = 4;
131             } else {
132                 silk_assert( 0 );
133                 return SILK_DEC_INVALID_FRAME_SIZE;
134             }
135             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
136             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
137                 silk_assert( 0 );
138                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
139             }
140             silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec );
141         }
142     }
143
144     /* Initialize resampler when switching internal or external sampling frequency */
145     if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) {
146         ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate );
147         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
148             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
149         }
150     }
151     channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate;
152     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
153         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
154         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
155     }
156     psDec->nChannelsAPI      = decControl->nChannelsAPI;
157     psDec->nChannelsInternal = decControl->nChannelsInternal;
158
159     if( decControl->API_sampleRate > MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
160         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
161         return( ret );
162     }
163
164     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
165         /* First decoder call for this payload */
166         /* Decode VAD flags and LBRR flag */
167         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
168             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
169                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
170             }
171             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
172         }
173         /* Decode LBRR flags */
174         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
175             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
176             if( channel_state[ n ].LBRR_flag ) {
177                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
178                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
179                 } else {
180                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
181                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
182                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
183                     }
184                 }
185             }
186         }
187
188         if( lostFlag == FLAG_DECODE_NORMAL ) {
189             /* Regular decoding: skip all LBRR data */
190             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
191                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
192                     if( channel_state[ n ].LBRR_flags[ i ] ) {
193                         opus_int pulses[ MAX_FRAME_LENGTH ];
194                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
195                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
196                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
197                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
198                             }
199                         }
200                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1 );
201                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
202                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
203                     }
204                 }
205             }
206         }
207     }
208
209     /* Get MS predictor index */
210     if( decControl->nChannelsInternal == 2 ) {
211         if(   lostFlag == FLAG_DECODE_NORMAL ||
212             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
213         {
214             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
215             /* For LBRR data, only decode mid-only flag if side-channel's LBRR flag is false */
216             if(   lostFlag == FLAG_DECODE_NORMAL ||
217                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
218             {
219                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
220             } else {
221                 decode_only_middle = 0;
222             }
223         } else {
224             for( n = 0; n < 2; n++ ) {
225                 MS_pred_Q13[n] = psDec->sStereo.pred_prev_Q13[n];
226             }
227         }
228     }
229
230     /* Call decoder for one frame */
231     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
232         if( n == 0 || decode_only_middle == 0 ) {
233             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag );
234         } else {
235             silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
236         }
237     }
238
239     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
240         /* Convert Mid/Side to Left/Right */
241         silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
242     } else {
243         /* Buffering */
244         silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
245         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
246     }
247
248     /* Number of output samples */
249     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
250
251     /* Set up pointers to temp buffers */
252     if( decControl->nChannelsAPI == 2 ) {
253         resample_out_ptr = samplesOut2_tmp;
254     } else {
255         resample_out_ptr = samplesOut;
256     }
257
258     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
259         /* Resample decoded signal to API_sampleRate */
260         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
261
262         /* Interleave if stereo output and stereo stream */
263         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
264             for( i = 0; i < *nSamplesOut; i++ ) {
265                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
266             }
267         }
268     }
269
270     /* Create two channel output from mono stream */
271     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
272         for( i = 0; i < *nSamplesOut; i++ ) {
273             samplesOut[ 0 + 2 * i ] = samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
274         }
275     }
276
277     return ret;
278 }
279
280 /* Getting table of contents for a packet */
281 opus_int silk_get_TOC(
282     const opus_uint8                     *payload,           /* I    Payload data                                */
283     const opus_int                       nBytesIn,           /* I:   Number of input bytes                       */
284     const opus_int                       nFramesPerPayload,  /* I:   Number of SILK frames per payload           */
285     silk_TOC_struct                 *Silk_TOC           /* O:   Type of content                             */
286 )
287 {
288     opus_int i, flags, ret = SILK_NO_ERROR;
289
290     if( nBytesIn < 1 ) {
291         return -1;
292     }
293     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
294         return -1;
295     }
296
297     silk_memset( Silk_TOC, 0, sizeof( Silk_TOC ) );
298
299     /* For stereo, extract the flags for the mid channel */
300     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
301
302     Silk_TOC->inbandFECFlag = flags & 1;
303     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
304         flags = silk_RSHIFT( flags, 1 );
305         Silk_TOC->VADFlags[ i ] = flags & 1;
306         Silk_TOC->VADFlag |= flags & 1;
307     }
308
309     return ret;
310 }