Misc SILK fixes:
[opus.git] / silk / dec_API.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, (subject to the limitations in the disclaimer below)
5 are permitted provided that the following conditions are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Skype Limited, nor the names of specific
12 contributors, may be used to endorse or promote products derived from
13 this software without specific prior written permission.
14 NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15 BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16 CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "API.h"
32 #include "main.h"
33
34 static const int dec_delay_matrix[3][5] = {
35 /*SILK API 8  12  16  24  48 */
36 /* 8 */   {3, 0, 2, 0, 0},
37 /*12 */   {0, 8, 5, 7, 5},
38 /*16 */   {0, 0, 8, 5, 5}
39 };
40
41
42 /************************/
43 /* Decoder Super Struct */
44 /************************/
45 typedef struct {
46     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
47     stereo_dec_state                sStereo;
48     opus_int                         nChannelsAPI;
49     opus_int                         nChannelsInternal;
50 } silk_decoder;
51
52 /*********************/
53 /* Decoder functions */
54 /*********************/
55
56 opus_int silk_Get_Decoder_Size( int *decSizeBytes )
57 {
58     opus_int ret = SILK_NO_ERROR;
59
60     *decSizeBytes = sizeof( silk_decoder );
61
62     return ret;
63 }
64
65 /* Reset decoder state */
66 opus_int silk_InitDecoder(
67     void* decState                                      /* I/O: State                                          */
68 )
69 {
70     opus_int n, ret = SILK_NO_ERROR;
71     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
72
73     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
74         ret  = silk_init_decoder( &channel_state[ n ] );
75     }
76
77     return ret;
78 }
79
80 /* Decode a frame */
81 opus_int silk_Decode(
82     void*                               decState,       /* I/O: State                                           */
83     silk_DecControlStruct*      decControl,     /* I/O: Control Structure                               */
84     opus_int                             lostFlag,       /* I:   0: no loss, 1 loss, 2 decode FEC                */
85     opus_int                             newPacketFlag,  /* I:   Indicates first decoder call for this packet    */
86     ec_dec                              *psRangeDec,    /* I/O  Compressor data structure                       */
87     opus_int16                           *samplesOut,    /* O:   Decoded output speech vector                    */
88     opus_int32                           *nSamplesOut    /* O:   Number of samples decoded                       */
89 )
90 {
91     opus_int   i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR;
92     opus_int32 nSamplesOutDec, LBRR_symbol;
93     opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ];
94     opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
95     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
96     opus_int16 *resample_out_ptr;
97     silk_decoder *psDec = ( silk_decoder * )decState;
98     silk_decoder_state *channel_state = psDec->channel_state;
99     int delay;
100
101     delay = channel_state[ 0 ].delay;
102
103     /**********************************/
104     /* Test if first frame in payload */
105     /**********************************/
106     if( newPacketFlag ) {
107         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
108             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
109         }
110     }
111
112     /* Save previous sample frequency */
113     prev_fs_kHz = channel_state[ 0 ].fs_kHz;
114
115     /* If Mono -> Stereo transition in bitstream: init state of second channel */
116     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
117         ret += silk_init_decoder( &channel_state[ 1 ] );
118         if( psDec->nChannelsAPI == 2 ) {
119             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
120             silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, MAX_DECODER_DELAY*sizeof(opus_int16));
121         }
122     }
123
124     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
125         if( channel_state[ n ].nFramesDecoded == 0 ) {
126             opus_int fs_kHz_dec;
127             if( decControl->payloadSize_ms == 0 ) {
128                 /* Assuming packet loss, use 10 ms */
129                 channel_state[ n ].nFramesPerPacket = 1;
130                 channel_state[ n ].nb_subfr = 2;
131             } else if( decControl->payloadSize_ms == 10 ) {
132                 channel_state[ n ].nFramesPerPacket = 1;
133                 channel_state[ n ].nb_subfr = 2;
134             } else if( decControl->payloadSize_ms == 20 ) {
135                 channel_state[ n ].nFramesPerPacket = 1;
136                 channel_state[ n ].nb_subfr = 4;
137             } else if( decControl->payloadSize_ms == 40 ) {
138                 channel_state[ n ].nFramesPerPacket = 2;
139                 channel_state[ n ].nb_subfr = 4;
140             } else if( decControl->payloadSize_ms == 60 ) {
141                 channel_state[ n ].nFramesPerPacket = 3;
142                 channel_state[ n ].nb_subfr = 4;
143             } else {
144                 silk_assert( 0 );
145                 return SILK_DEC_INVALID_FRAME_SIZE;
146             }
147             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
148             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
149                 silk_assert( 0 );
150                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
151             }
152             silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec );
153         }
154     }
155
156     /* Initialize resampler when switching internal or external sampling frequency */
157     if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) {
158         channel_state[ 0 ].delay = dec_delay_matrix[rateID(silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ))][rateID(decControl->API_sampleRate)];
159         silk_assert(channel_state[ 0 ].delay <= MAX_DECODER_DELAY);
160         ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate );
161         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
162             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
163             channel_state[ 1 ].delay = channel_state[ 0 ].delay;
164         }
165     }
166     channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate;
167     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
168         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
169         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
170     }
171     psDec->nChannelsAPI      = decControl->nChannelsAPI;
172     psDec->nChannelsInternal = decControl->nChannelsInternal;
173
174     if( decControl->API_sampleRate > MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
175         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
176         return( ret );
177     }
178
179     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
180         /* First decoder call for this payload */
181         /* Decode VAD flags and LBRR flag */
182         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
183             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
184                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
185             }
186             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
187         }
188         /* Decode LBRR flags */
189         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
190             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
191             if( channel_state[ n ].LBRR_flag ) {
192                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
193                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
194                 } else {
195                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
196                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
197                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
198                     }
199                 }
200             }
201         }
202
203         if( lostFlag == FLAG_DECODE_NORMAL ) {
204             /* Regular decoding: skip all LBRR data */
205             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
206                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
207                     if( channel_state[ n ].LBRR_flags[ i ] ) {
208                         opus_int pulses[ MAX_FRAME_LENGTH ];
209                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
210                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
211                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
212                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
213                             }
214                         }
215                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1 );
216                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
217                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
218                     }
219                 }
220             }
221         }
222     }
223
224     /* Get MS predictor index */
225     if( decControl->nChannelsInternal == 2 ) {
226         if(   lostFlag == FLAG_DECODE_NORMAL ||
227             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
228         {
229             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
230             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
231             if(   lostFlag == FLAG_DECODE_NORMAL ||
232                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
233             {
234                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
235             } else {
236                 decode_only_middle = 0;
237             }
238         } else {
239             for( n = 0; n < 2; n++ ) {
240                 MS_pred_Q13[n] = psDec->sStereo.pred_prev_Q13[n];
241             }
242         }
243     }
244
245     /* Call decoder for one frame */
246     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
247         if( n == 0 || decode_only_middle == 0 ) {
248             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 + delay ], &nSamplesOutDec, lostFlag );
249         } else {
250             silk_memset( &samplesOut1_tmp[ n ][ 2 + delay ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
251         }
252     }
253
254     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
255         /* Convert Mid/Side to Left/Right */
256         silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][delay], &samplesOut1_tmp[ 1 ][delay], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
257     } else {
258         /* Buffering */
259         silk_memcpy( &samplesOut1_tmp[ 0 ][delay], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
260         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) );
261     }
262
263     /* Number of output samples */
264     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
265
266     /* Set up pointers to temp buffers */
267     if( decControl->nChannelsAPI == 2 ) {
268         resample_out_ptr = samplesOut2_tmp;
269     } else {
270         resample_out_ptr = samplesOut;
271     }
272
273     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
274
275         silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY-delay ], delay*sizeof(opus_int16));
276         /* Resample decoded signal to API_sampleRate */
277         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
278         silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY*sizeof(opus_int16));
279
280         /* Interleave if stereo output and stereo stream */
281         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
282             for( i = 0; i < *nSamplesOut; i++ ) {
283                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
284             }
285         }
286     }
287
288     /* Create two channel output from mono stream */
289     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
290         for( i = 0; i < *nSamplesOut; i++ ) {
291             samplesOut[ 0 + 2 * i ] = samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
292         }
293     }
294
295     return ret;
296 }
297
298 /* Getting table of contents for a packet */
299 opus_int silk_get_TOC(
300     const opus_uint8                     *payload,           /* I    Payload data                                */
301     const opus_int                       nBytesIn,           /* I:   Number of input bytes                       */
302     const opus_int                       nFramesPerPayload,  /* I:   Number of SILK frames per payload           */
303     silk_TOC_struct                 *Silk_TOC           /* O:   Type of content                             */
304 )
305 {
306     opus_int i, flags, ret = SILK_NO_ERROR;
307
308     if( nBytesIn < 1 ) {
309         return -1;
310     }
311     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
312         return -1;
313     }
314
315     silk_memset( Silk_TOC, 0, sizeof( Silk_TOC ) );
316
317     /* For stereo, extract the flags for the mid channel */
318     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
319
320     Silk_TOC->inbandFECFlag = flags & 1;
321     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
322         flags = silk_RSHIFT( flags, 1 );
323         Silk_TOC->VADFlags[ i ] = flags & 1;
324         Silk_TOC->VADFlag |= flags & 1;
325     }
326
327     return ret;
328 }