Speeding up ec_tell_frac()
[opus.git] / silk / dec_API.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "API.h"
32 #include "main.h"
33 #include "stack_alloc.h"
34 #include "os_support.h"
35
36 /************************/
37 /* Decoder Super Struct */
38 /************************/
39 typedef struct {
40     silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
41     stereo_dec_state                sStereo;
42     opus_int                         nChannelsAPI;
43     opus_int                         nChannelsInternal;
44     opus_int                         prev_decode_only_middle;
45 } silk_decoder;
46
47 /*********************/
48 /* Decoder functions */
49 /*********************/
50
51 opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
52     opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
53 )
54 {
55     opus_int ret = SILK_NO_ERROR;
56
57     *decSizeBytes = sizeof( silk_decoder );
58
59     return ret;
60 }
61
62 /* Reset decoder state */
63 opus_int silk_InitDecoder(                              /* O    Returns error code                              */
64     void                            *decState           /* I/O  State                                           */
65 )
66 {
67     opus_int n, ret = SILK_NO_ERROR;
68     silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
69
70     for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
71         ret  = silk_init_decoder( &channel_state[ n ] );
72     }
73     silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
74     /* Not strictly needed, but it's cleaner that way */
75     ((silk_decoder *)decState)->prev_decode_only_middle = 0;
76
77     return ret;
78 }
79
80 /* Decode a frame */
81 opus_int silk_Decode(                                   /* O    Returns error code                              */
82     void*                           decState,           /* I/O  State                                           */
83     silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
84     opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
85     opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
86     ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
87     opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
88     opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
89 )
90 {
91     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
92     opus_int32 nSamplesOutDec, LBRR_symbol;
93     opus_int16 *samplesOut1_tmp[ 2 ];
94     VARDECL( opus_int16, samplesOut1_tmp_storage1 );
95     VARDECL( opus_int16, samplesOut1_tmp_storage2 );
96     VARDECL( opus_int16, samplesOut2_tmp );
97     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
98     opus_int16 *resample_out_ptr;
99     silk_decoder *psDec = ( silk_decoder * )decState;
100     silk_decoder_state *channel_state = psDec->channel_state;
101     opus_int has_side;
102     opus_int stereo_to_mono;
103     int delay_stack_alloc;
104     SAVE_STACK;
105
106     silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
107
108     /**********************************/
109     /* Test if first frame in payload */
110     /**********************************/
111     if( newPacketFlag ) {
112         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
113             channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
114         }
115     }
116
117     /* If Mono -> Stereo transition in bitstream: init state of second channel */
118     if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
119         ret += silk_init_decoder( &channel_state[ 1 ] );
120     }
121
122     stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
123                      ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
124
125     if( channel_state[ 0 ].nFramesDecoded == 0 ) {
126         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
127             opus_int fs_kHz_dec;
128             if( decControl->payloadSize_ms == 0 ) {
129                 /* Assuming packet loss, use 10 ms */
130                 channel_state[ n ].nFramesPerPacket = 1;
131                 channel_state[ n ].nb_subfr = 2;
132             } else if( decControl->payloadSize_ms == 10 ) {
133                 channel_state[ n ].nFramesPerPacket = 1;
134                 channel_state[ n ].nb_subfr = 2;
135             } else if( decControl->payloadSize_ms == 20 ) {
136                 channel_state[ n ].nFramesPerPacket = 1;
137                 channel_state[ n ].nb_subfr = 4;
138             } else if( decControl->payloadSize_ms == 40 ) {
139                 channel_state[ n ].nFramesPerPacket = 2;
140                 channel_state[ n ].nb_subfr = 4;
141             } else if( decControl->payloadSize_ms == 60 ) {
142                 channel_state[ n ].nFramesPerPacket = 3;
143                 channel_state[ n ].nb_subfr = 4;
144             } else {
145                 silk_assert( 0 );
146                 RESTORE_STACK;
147                 return SILK_DEC_INVALID_FRAME_SIZE;
148             }
149             fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
150             if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
151                 silk_assert( 0 );
152                 RESTORE_STACK;
153                 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
154             }
155             ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
156         }
157     }
158
159     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
160         silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
161         silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
162         silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
163     }
164     psDec->nChannelsAPI      = decControl->nChannelsAPI;
165     psDec->nChannelsInternal = decControl->nChannelsInternal;
166
167     if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
168         ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
169         RESTORE_STACK;
170         return( ret );
171     }
172
173     if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
174         /* First decoder call for this payload */
175         /* Decode VAD flags and LBRR flag */
176         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
177             for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
178                 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
179             }
180             channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
181         }
182         /* Decode LBRR flags */
183         for( n = 0; n < decControl->nChannelsInternal; n++ ) {
184             silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
185             if( channel_state[ n ].LBRR_flag ) {
186                 if( channel_state[ n ].nFramesPerPacket == 1 ) {
187                     channel_state[ n ].LBRR_flags[ 0 ] = 1;
188                 } else {
189                     LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
190                     for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
191                         channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
192                     }
193                 }
194             }
195         }
196
197         if( lostFlag == FLAG_DECODE_NORMAL ) {
198             /* Regular decoding: skip all LBRR data */
199             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
200                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
201                     if( channel_state[ n ].LBRR_flags[ i ] ) {
202                         opus_int16 pulses[ MAX_FRAME_LENGTH ];
203                         opus_int condCoding;
204
205                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
206                             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
207                             if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
208                                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
209                             }
210                         }
211                         /* Use conditional coding if previous frame available */
212                         if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
213                             condCoding = CODE_CONDITIONALLY;
214                         } else {
215                             condCoding = CODE_INDEPENDENTLY;
216                         }
217                         silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
218                         silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
219                             channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
220                     }
221                 }
222             }
223         }
224     }
225
226     /* Get MS predictor index */
227     if( decControl->nChannelsInternal == 2 ) {
228         if(   lostFlag == FLAG_DECODE_NORMAL ||
229             ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
230         {
231             silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
232             /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
233             if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
234                 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
235             {
236                 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
237             } else {
238                 decode_only_middle = 0;
239             }
240         } else {
241             for( n = 0; n < 2; n++ ) {
242                 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
243             }
244         }
245     }
246
247     /* Reset side channel decoder prediction memory for first frame with side coding */
248     if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
249         silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
250         silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
251         psDec->channel_state[ 1 ].lagPrev        = 100;
252         psDec->channel_state[ 1 ].LastGainIndex  = 10;
253         psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
254         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
255     }
256
257     /* Check if the temp buffer fits into the output PCM buffer. If it fits,
258        we can delay allocating the temp buffer until after the SILK peak stack
259        usage. We need to use a < and not a <= because of the two extra samples. */
260     delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
261           < decControl->API_sampleRate*decControl->nChannelsAPI;
262     ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
263            : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
264            opus_int16 );
265     if ( delay_stack_alloc )
266     {
267        samplesOut1_tmp[ 0 ] = samplesOut;
268        samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
269     } else {
270        samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
271        samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
272     }
273
274     if( lostFlag == FLAG_DECODE_NORMAL ) {
275         has_side = !decode_only_middle;
276     } else {
277         has_side = !psDec->prev_decode_only_middle
278               || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
279     }
280     /* Call decoder for one frame */
281     for( n = 0; n < decControl->nChannelsInternal; n++ ) {
282         if( n == 0 || has_side ) {
283             opus_int FrameIndex;
284             opus_int condCoding;
285
286             FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
287             /* Use independent coding if no previous frame available */
288             if( FrameIndex <= 0 ) {
289                 condCoding = CODE_INDEPENDENTLY;
290             } else if( lostFlag == FLAG_DECODE_LBRR ) {
291                 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
292             } else if( n > 0 && psDec->prev_decode_only_middle ) {
293                 /* If we skipped a side frame in this packet, we don't
294                    need LTP scaling; the LTP state is well-defined. */
295                 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
296             } else {
297                 condCoding = CODE_CONDITIONALLY;
298             }
299             ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
300         } else {
301             silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
302         }
303         channel_state[ n ].nFramesDecoded++;
304     }
305
306     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
307         /* Convert Mid/Side to Left/Right */
308         silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
309     } else {
310         /* Buffering */
311         silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
312         silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
313     }
314
315     /* Number of output samples */
316     *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
317
318     /* Set up pointers to temp buffers */
319     ALLOC( samplesOut2_tmp,
320            decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
321     if( decControl->nChannelsAPI == 2 ) {
322         resample_out_ptr = samplesOut2_tmp;
323     } else {
324         resample_out_ptr = samplesOut;
325     }
326
327     ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
328            ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
329            : ALLOC_NONE,
330            opus_int16 );
331     if ( delay_stack_alloc ) {
332        OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
333        samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
334        samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
335     }
336     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
337
338         /* Resample decoded signal to API_sampleRate */
339         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
340
341         /* Interleave if stereo output and stereo stream */
342         if( decControl->nChannelsAPI == 2 ) {
343             for( i = 0; i < *nSamplesOut; i++ ) {
344                 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
345             }
346         }
347     }
348
349     /* Create two channel output from mono stream */
350     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
351         if ( stereo_to_mono ){
352             /* Resample right channel for newly collapsed stereo just in case
353                we weren't doing collapsing when switching to mono */
354             ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
355
356             for( i = 0; i < *nSamplesOut; i++ ) {
357                 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
358             }
359         } else {
360             for( i = 0; i < *nSamplesOut; i++ ) {
361                 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
362             }
363         }
364     }
365
366     /* Export pitch lag, measured at 48 kHz sampling rate */
367     if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
368         int mult_tab[ 3 ] = { 6, 4, 3 };
369         decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
370     } else {
371         decControl->prevPitchLag = 0;
372     }
373
374     if( lostFlag == FLAG_PACKET_LOST ) {
375        /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
376           if we lose packets when the energy is going down */
377        for ( i = 0; i < psDec->nChannelsInternal; i++ )
378           psDec->channel_state[ i ].LastGainIndex = 10;
379     } else {
380        psDec->prev_decode_only_middle = decode_only_middle;
381     }
382     RESTORE_STACK;
383     return ret;
384 }
385
386 #if 0
387 /* Getting table of contents for a packet */
388 opus_int silk_get_TOC(
389     const opus_uint8                *payload,           /* I    Payload data                                */
390     const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
391     const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
392     silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
393 )
394 {
395     opus_int i, flags, ret = SILK_NO_ERROR;
396
397     if( nBytesIn < 1 ) {
398         return -1;
399     }
400     if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
401         return -1;
402     }
403
404     silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
405
406     /* For stereo, extract the flags for the mid channel */
407     flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
408
409     Silk_TOC->inbandFECFlag = flags & 1;
410     for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
411         flags = silk_RSHIFT( flags, 1 );
412         Silk_TOC->VADFlags[ i ] = flags & 1;
413         Silk_TOC->VADFlag |= flags & 1;
414     }
415
416     return ret;
417 }
418 #endif