58cf59a8a33878dd96f65281639050f0faff92dc
[opus.git] / silk / dec_API.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, (subject to the limitations in the disclaimer below)
5 are permitted provided that the following conditions are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Skype Limited, nor the names of specific
12 contributors, may be used to endorse or promote products derived from
13 this software without specific prior written permission.
14 NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15 BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16 CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "API.h"
32 #include "main.h"
33
34 /************************/
35 /* Decoder Super Struct */
36 /************************/
37 typedef struct {
38     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
39     stereo_dec_state                sStereo;
40     opus_int                         nChannelsAPI;
41     opus_int                         nChannelsInternal;
42     opus_int                         prev_decode_only_middle;
43 } silk_decoder;
44
45 /*********************/
46 /* Decoder functions */
47 /*********************/
48
49 opus_int silk_Get_Decoder_Size( int *decSizeBytes )
50 {
51     opus_int ret = SILK_NO_ERROR;
52
53     *decSizeBytes = sizeof( silk_decoder );
54
55     return ret;
56 }
57
58 /* Reset decoder state */
59 opus_int silk_InitDecoder(
60     void* decState                                      /* I/O: State                                          */
61 )
62 {
63     opus_int n, ret = SILK_NO_ERROR;
64     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
65
66     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
67         ret  = silk_init_decoder( &channel_state[ n ] );
68     }
69
70     return ret;
71 }
72
73 /* Decode a frame */
74 opus_int silk_Decode(
75     void*                               decState,       /* I/O: State                                           */
76     silk_DecControlStruct*      decControl,     /* I/O: Control Structure                               */
77     opus_int                             lostFlag,       /* I:   0: no loss, 1 loss, 2 decode FEC                */
78     opus_int                             newPacketFlag,  /* I:   Indicates first decoder call for this packet    */
79     ec_dec                              *psRangeDec,    /* I/O  Compressor data structure                       */
80     opus_int16                           *samplesOut,    /* O:   Decoded output speech vector                    */
81     opus_int32                           *nSamplesOut    /* O:   Number of samples decoded                       */
82 )
83 {
84     opus_int   i, n, delay, decode_only_middle = 0, ret = SILK_NO_ERROR;
85     opus_int32 nSamplesOutDec, LBRR_symbol;
86     opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ];
87     opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
88     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
89     opus_int16 *resample_out_ptr;
90     silk_decoder *psDec = ( silk_decoder * )decState;
91     silk_decoder_state *channel_state = psDec->channel_state;
92     opus_int has_side;
93
94     /**********************************/
95     /* Test if first frame in payload */
96     /**********************************/
97     if( newPacketFlag ) {
98         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
99             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
100         }
101     }
102
103     /* If Mono -> Stereo transition in bitstream: init state of second channel */
104     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
105         ret += silk_init_decoder( &channel_state[ 1 ] );
106     }
107
108     if( channel_state[ 0 ].nFramesDecoded == 0 ) {
109         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
110             opus_int fs_kHz_dec;
111             if( decControl->payloadSize_ms == 0 ) {
112                 /* Assuming packet loss, use 10 ms */
113                 channel_state[ n ].nFramesPerPacket = 1;
114                 channel_state[ n ].nb_subfr = 2;
115             } else if( decControl->payloadSize_ms == 10 ) {
116                 channel_state[ n ].nFramesPerPacket = 1;
117                 channel_state[ n ].nb_subfr = 2;
118             } else if( decControl->payloadSize_ms == 20 ) {
119                 channel_state[ n ].nFramesPerPacket = 1;
120                 channel_state[ n ].nb_subfr = 4;
121             } else if( decControl->payloadSize_ms == 40 ) {
122                 channel_state[ n ].nFramesPerPacket = 2;
123                 channel_state[ n ].nb_subfr = 4;
124             } else if( decControl->payloadSize_ms == 60 ) {
125                 channel_state[ n ].nFramesPerPacket = 3;
126                 channel_state[ n ].nb_subfr = 4;
127             } else {
128                 silk_assert( 0 );
129                 return SILK_DEC_INVALID_FRAME_SIZE;
130             }
131             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
132             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
133                 silk_assert( 0 );
134                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
135             }
136             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
137         }
138     }
139
140     delay = channel_state[ 0 ].delay;
141
142     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
143         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
144         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
145         silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
146         silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, sizeof(channel_state[ 0 ].delayBuf));
147     }
148     psDec->nChannelsAPI      = decControl->nChannelsAPI;
149     psDec->nChannelsInternal = decControl->nChannelsInternal;
150
151     if( decControl->API_sampleRate > MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
152         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
153         return( ret );
154     }
155
156     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
157         /* First decoder call for this payload */
158         /* Decode VAD flags and LBRR flag */
159         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
160             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
161                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
162             }
163             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
164         }
165         /* Decode LBRR flags */
166         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
167             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
168             if( channel_state[ n ].LBRR_flag ) {
169                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
170                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
171                 } else {
172                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
173                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
174                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
175                     }
176                 }
177             }
178         }
179
180         if( lostFlag == FLAG_DECODE_NORMAL ) {
181             /* Regular decoding: skip all LBRR data */
182             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
183                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
184                     if( channel_state[ n ].LBRR_flags[ i ] ) {
185                         opus_int pulses[ MAX_FRAME_LENGTH ];
186                         opus_int condCoding;
187
188                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
189                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
190                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
191                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
192                             }
193                         }
194                         /* Use conditional coding if previous frame available */
195                         if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
196                             condCoding = CODE_CONDITIONALLY;
197                         } else {
198                             condCoding = CODE_INDEPENDENTLY;
199                         }
200                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
201                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
202                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
203                     }
204                 }
205             }
206         }
207     }
208
209     /* Get MS predictor index */
210     if( decControl->nChannelsInternal == 2 ) {
211         if(   lostFlag == FLAG_DECODE_NORMAL ||
212             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
213         {
214             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
215             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
216             if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
217                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
218             {
219                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
220             } else {
221                 decode_only_middle = 0;
222             }
223         } else {
224             for( n = 0; n < 2; n++ ) {
225                 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
226             }
227         }
228     }
229
230     /* Reset side channel decoder prediction memory for first frame with side coding */
231     if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
232         silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
233         silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
234         psDec->channel_state[ 1 ].lagPrev        = 100;
235         psDec->channel_state[ 1 ].LastGainIndex  = 10;
236         psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
237         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
238     }
239
240     if (lostFlag == FLAG_DECODE_NORMAL) {
241         has_side = !decode_only_middle;
242     } else {
243         has_side = !psDec->prev_decode_only_middle
244               || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
245     }
246     /* Call decoder for one frame */
247     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
248         if( n == 0 || has_side ) {
249             opus_int FrameIndex;
250             opus_int condCoding;
251
252             FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
253             /* Use independent coding if no previous frame available */
254             if( FrameIndex <= 0 ) {
255                 condCoding = CODE_INDEPENDENTLY;
256             } else if( lostFlag == FLAG_DECODE_LBRR ) {
257                 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
258             } else if( n > 0 && psDec->prev_decode_only_middle ) {
259                 /* If we skipped a side frame in this packet, we don't
260                    need LTP scaling; the LTP state is well-defined. */
261                 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
262             } else {
263                 condCoding = CODE_CONDITIONALLY;
264             }
265             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 + delay ], &nSamplesOutDec, lostFlag, condCoding);
266         } else {
267             silk_memset( &samplesOut1_tmp[ n ][ 2 + delay ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
268         }
269         channel_state[ n ].nFramesDecoded++;
270     }
271
272     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
273         /* Convert Mid/Side to Left/Right */
274         silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][ delay ], &samplesOut1_tmp[ 1 ][ delay ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
275     } else {
276         /* Buffering */
277         silk_memcpy( &samplesOut1_tmp[ 0 ][ delay ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
278         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) );
279     }
280
281     /* Number of output samples */
282     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
283
284     /* Set up pointers to temp buffers */
285     if( decControl->nChannelsAPI == 2 ) {
286         resample_out_ptr = samplesOut2_tmp;
287     } else {
288         resample_out_ptr = samplesOut;
289     }
290
291     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
292
293         silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY - delay ], delay * sizeof(opus_int16));
294         /* Resample decoded signal to API_sampleRate */
295         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
296         silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY * sizeof(opus_int16));
297
298         /* Interleave if stereo output and stereo stream */
299         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
300             for( i = 0; i < *nSamplesOut; i++ ) {
301                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
302             }
303         }
304     }
305
306     /* Create two channel output from mono stream */
307     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
308         for( i = 0; i < *nSamplesOut; i++ ) {
309             samplesOut[ 0 + 2 * i ] = samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
310         }
311     }
312
313     /* Export pitch lag, measured at 48 kHz sampling rate */
314     if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
315         int mult_tab[ 3 ] = { 6, 4, 3 };
316         decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
317     } else {
318         decControl->prevPitchLag = 0;
319     }
320
321     if ( lostFlag != FLAG_PACKET_LOST ) {
322        psDec->prev_decode_only_middle = decode_only_middle;
323     }
324     return ret;
325 }
326
327 /* Getting table of contents for a packet */
328 opus_int silk_get_TOC(
329     const opus_uint8                     *payload,           /* I    Payload data                                */
330     const opus_int                       nBytesIn,           /* I:   Number of input bytes                       */
331     const opus_int                       nFramesPerPayload,  /* I:   Number of SILK frames per payload           */
332     silk_TOC_struct                 *Silk_TOC           /* O:   Type of content                             */
333 )
334 {
335     opus_int i, flags, ret = SILK_NO_ERROR;
336
337     if( nBytesIn < 1 ) {
338         return -1;
339     }
340     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
341         return -1;
342     }
343
344     silk_memset( Silk_TOC, 0, sizeof( Silk_TOC ) );
345
346     /* For stereo, extract the flags for the mid channel */
347     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
348
349     Silk_TOC->inbandFECFlag = flags & 1;
350     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
351         flags = silk_RSHIFT( flags, 1 );
352         Silk_TOC->VADFlags[ i ] = flags & 1;
353         Silk_TOC->VADFlag |= flags & 1;
354     }
355
356     return ret;
357 }