Merge branch 'exp_analysis7'
[opus.git] / silk / dec_API.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "API.h"
32 #include "main.h"
33 #include "stack_alloc.h"
34
35 /************************/
36 /* Decoder Super Struct */
37 /************************/
38 typedef struct {
39     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
40     stereo_dec_state                sStereo;
41     opus_int                         nChannelsAPI;
42     opus_int                         nChannelsInternal;
43     opus_int                         prev_decode_only_middle;
44 } silk_decoder;
45
46 /*********************/
47 /* Decoder functions */
48 /*********************/
49
50 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
51     opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
52 )
53 {
54     opus_int ret = SILK_NO_ERROR;
55
56     *decSizeBytes = sizeof( silk_decoder );
57
58     return ret;
59 }
60
61 /* Reset decoder state */
62 opus_int silk_InitDecoder(                              /* O    Returns error code                              */
63     void                            *decState           /* I/O  State                                           */
64 )
65 {
66     opus_int n, ret = SILK_NO_ERROR;
67     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
68
69     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
70         ret  = silk_init_decoder( &channel_state[ n ] );
71     }
72
73     return ret;
74 }
75
76 /* Decode a frame */
77 opus_int silk_Decode(                                   /* O    Returns error code                              */
78     void*                           decState,           /* I/O  State                                           */
79     silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
80     opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
81     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
82     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
83     opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
84     opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
85 )
86 {
87     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
88     opus_int32 nSamplesOutDec, LBRR_symbol;
89     opus_int16 *samplesOut1_tmp[ 2 ];
90     VARDECL( opus_int16, samplesOut1_tmp_storage );
91     VARDECL( opus_int16, samplesOut2_tmp );
92     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
93     opus_int16 *resample_out_ptr;
94     silk_decoder *psDec = ( silk_decoder * )decState;
95     silk_decoder_state *channel_state = psDec->channel_state;
96     opus_int has_side;
97     opus_int stereo_to_mono;
98     SAVE_STACK;
99
100     /**********************************/
101     /* Test if first frame in payload */
102     /**********************************/
103     if( newPacketFlag ) {
104         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
105             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
106         }
107     }
108
109     /* If Mono -> Stereo transition in bitstream: init state of second channel */
110     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
111         ret += silk_init_decoder( &channel_state[ 1 ] );
112     }
113
114     stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
115                      ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
116
117     if( channel_state[ 0 ].nFramesDecoded == 0 ) {
118         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
119             opus_int fs_kHz_dec;
120             if( decControl->payloadSize_ms == 0 ) {
121                 /* Assuming packet loss, use 10 ms */
122                 channel_state[ n ].nFramesPerPacket = 1;
123                 channel_state[ n ].nb_subfr = 2;
124             } else if( decControl->payloadSize_ms == 10 ) {
125                 channel_state[ n ].nFramesPerPacket = 1;
126                 channel_state[ n ].nb_subfr = 2;
127             } else if( decControl->payloadSize_ms == 20 ) {
128                 channel_state[ n ].nFramesPerPacket = 1;
129                 channel_state[ n ].nb_subfr = 4;
130             } else if( decControl->payloadSize_ms == 40 ) {
131                 channel_state[ n ].nFramesPerPacket = 2;
132                 channel_state[ n ].nb_subfr = 4;
133             } else if( decControl->payloadSize_ms == 60 ) {
134                 channel_state[ n ].nFramesPerPacket = 3;
135                 channel_state[ n ].nb_subfr = 4;
136             } else {
137                 silk_assert( 0 );
138                 RESTORE_STACK;
139                 return SILK_DEC_INVALID_FRAME_SIZE;
140             }
141             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
142             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
143                 silk_assert( 0 );
144                 RESTORE_STACK;
145                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
146             }
147             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
148         }
149     }
150
151     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
152         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
153         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
154         silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
155     }
156     psDec->nChannelsAPI      = decControl->nChannelsAPI;
157     psDec->nChannelsInternal = decControl->nChannelsInternal;
158
159     if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
160         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
161         RESTORE_STACK;
162         return( ret );
163     }
164
165     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
166         /* First decoder call for this payload */
167         /* Decode VAD flags and LBRR flag */
168         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
169             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
170                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
171             }
172             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
173         }
174         /* Decode LBRR flags */
175         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
176             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
177             if( channel_state[ n ].LBRR_flag ) {
178                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
179                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
180                 } else {
181                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
182                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
183                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
184                     }
185                 }
186             }
187         }
188
189         if( lostFlag == FLAG_DECODE_NORMAL ) {
190             /* Regular decoding: skip all LBRR data */
191             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
192                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
193                     if( channel_state[ n ].LBRR_flags[ i ] ) {
194                         opus_int pulses[ MAX_FRAME_LENGTH ];
195                         opus_int condCoding;
196
197                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
198                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
199                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
200                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
201                             }
202                         }
203                         /* Use conditional coding if previous frame available */
204                         if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
205                             condCoding = CODE_CONDITIONALLY;
206                         } else {
207                             condCoding = CODE_INDEPENDENTLY;
208                         }
209                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
210                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
211                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
212                     }
213                 }
214             }
215         }
216     }
217
218     /* Get MS predictor index */
219     if( decControl->nChannelsInternal == 2 ) {
220         if(   lostFlag == FLAG_DECODE_NORMAL ||
221             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
222         {
223             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
224             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
225             if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
226                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
227             {
228                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
229             } else {
230                 decode_only_middle = 0;
231             }
232         } else {
233             for( n = 0; n < 2; n++ ) {
234                 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
235             }
236         }
237     }
238
239     /* Reset side channel decoder prediction memory for first frame with side coding */
240     if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
241         silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
242         silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
243         psDec->channel_state[ 1 ].lagPrev        = 100;
244         psDec->channel_state[ 1 ].LastGainIndex  = 10;
245         psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
246         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
247     }
248
249     ALLOC( samplesOut1_tmp_storage,
250            decControl->nChannelsInternal*(
251                channel_state[ 0 ].frame_length + 2 ),
252            opus_int16 );
253     samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
254     samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
255                            + channel_state[ 0 ].frame_length + 2;
256
257     if( lostFlag == FLAG_DECODE_NORMAL ) {
258         has_side = !decode_only_middle;
259     } else {
260         has_side = !psDec->prev_decode_only_middle
261               || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
262     }
263     /* Call decoder for one frame */
264     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
265         if( n == 0 || has_side ) {
266             opus_int FrameIndex;
267             opus_int condCoding;
268
269             FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
270             /* Use independent coding if no previous frame available */
271             if( FrameIndex <= 0 ) {
272                 condCoding = CODE_INDEPENDENTLY;
273             } else if( lostFlag == FLAG_DECODE_LBRR ) {
274                 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
275             } else if( n > 0 && psDec->prev_decode_only_middle ) {
276                 /* If we skipped a side frame in this packet, we don't
277                    need LTP scaling; the LTP state is well-defined. */
278                 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
279             } else {
280                 condCoding = CODE_CONDITIONALLY;
281             }
282             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
283         } else {
284             silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
285         }
286         channel_state[ n ].nFramesDecoded++;
287     }
288
289     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
290         /* Convert Mid/Side to Left/Right */
291         silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
292     } else {
293         /* Buffering */
294         silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
295         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
296     }
297
298     /* Number of output samples */
299     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
300
301     /* Set up pointers to temp buffers */
302     ALLOC( samplesOut2_tmp,
303            decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
304     if( decControl->nChannelsAPI == 2 ) {
305         resample_out_ptr = samplesOut2_tmp;
306     } else {
307         resample_out_ptr = samplesOut;
308     }
309
310     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
311
312         /* Resample decoded signal to API_sampleRate */
313         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
314
315         /* Interleave if stereo output and stereo stream */
316         if( decControl->nChannelsAPI == 2 ) {
317             for( i = 0; i < *nSamplesOut; i++ ) {
318                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
319             }
320         }
321     }
322
323     /* Create two channel output from mono stream */
324     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
325         if ( stereo_to_mono ){
326             /* Resample right channel for newly collapsed stereo just in case
327                we weren't doing collapsing when switching to mono */
328             ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
329
330             for( i = 0; i < *nSamplesOut; i++ ) {
331                 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
332             }
333         } else {
334             for( i = 0; i < *nSamplesOut; i++ ) {
335                 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
336             }
337         }
338     }
339
340     /* Export pitch lag, measured at 48 kHz sampling rate */
341     if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
342         int mult_tab[ 3 ] = { 6, 4, 3 };
343         decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
344     } else {
345         decControl->prevPitchLag = 0;
346     }
347
348     if( lostFlag == FLAG_PACKET_LOST ) {
349        /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
350           if we lose packets when the energy is going down */
351        for ( i = 0; i < psDec->nChannelsInternal; i++ )
352           psDec->channel_state[ i ].LastGainIndex = 10;
353     } else {
354        psDec->prev_decode_only_middle = decode_only_middle;
355     }
356     RESTORE_STACK;
357     return ret;
358 }
359
360 #if 0
361 /* Getting table of contents for a packet */
362 opus_int silk_get_TOC(
363     const opus_uint8                *payload,           /* I    Payload data                                */
364     const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
365     const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
366     silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
367 )
368 {
369     opus_int i, flags, ret = SILK_NO_ERROR;
370
371     if( nBytesIn < 1 ) {
372         return -1;
373     }
374     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
375         return -1;
376     }
377
378     silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
379
380     /* For stereo, extract the flags for the mid channel */
381     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
382
383     Silk_TOC->inbandFECFlag = flags & 1;
384     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
385         flags = silk_RSHIFT( flags, 1 );
386         Silk_TOC->VADFlags[ i ] = flags & 1;
387         Silk_TOC->VADFlag |= flags & 1;
388     }
389
390     return ret;
391 }
392 #endif