Merge commit '390c89225d'
[opus.git] / silk / fixed / encode_frame_FIX.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "main_FIX.h"
33 #include "tuning_parameters.h"
34
35 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
36 static inline void silk_LBRR_encode_FIX(
37     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
38     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
39     const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
40     opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
41 );
42
43 void silk_encode_do_VAD_FIX(
44     silk_encoder_state_FIX          *psEnc                                  /* I/O  Pointer to Silk FIX encoder state                                           */
45 )
46 {
47     /****************************/
48     /* Voice Activity Detection */
49     /****************************/
50     silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
51
52     /**************************************************/
53     /* Convert speech activity into VAD and DTX flags */
54     /**************************************************/
55     if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
56         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
57         psEnc->sCmn.noSpeechCounter++;
58         if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
59             psEnc->sCmn.inDTX = 0;
60         } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
61             psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
62             psEnc->sCmn.inDTX           = 0;
63         }
64         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
65     } else {
66         psEnc->sCmn.noSpeechCounter    = 0;
67         psEnc->sCmn.inDTX              = 0;
68         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
69         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
70     }
71 }
72
73 /****************/
74 /* Encode frame */
75 /****************/
76 opus_int silk_encode_frame_FIX(
77     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
78     opus_int32                      *pnBytesOut,                            /* O    Pointer to number of payload bytes;                                         */
79     ec_enc                          *psRangeEnc,                            /* I/O  compressor data structure                                                   */
80     opus_int                        condCoding,                             /* I    The type of conditional coding to use                                       */
81     opus_int                        maxBits,                                /* I    If > 0: maximum number of output bits                                       */
82     opus_int                        useCBR                                  /* I    Flag to force constant-bitrate operation                                    */
83 )
84 {
85     silk_encoder_control_FIX sEncCtrl;
86     opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;
87     opus_int16   *x_frame, *res_pitch_frame;
88     opus_int32   xfw_Q3[ MAX_FRAME_LENGTH ];
89     opus_int16   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
90     ec_enc       sRangeEnc_copy, sRangeEnc_copy2;
91     silk_nsq_state sNSQ_copy, sNSQ_copy2;
92     opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
93     opus_int32   gainsID, gainsID_lower, gainsID_upper;
94     opus_int16   gainMult_Q8;
95     opus_int16   ec_prevLagIndex_copy;
96     opus_int     ec_prevSignalType_copy;
97     opus_int8    LastGainIndex_copy2;
98     opus_uint8   ec_buf_copy[ 1275 ];
99
100     /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
101     LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
102
103     psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
104
105     /**************************************************************/
106     /* Set up Input Pointers, and insert frame in input buffer   */
107     /*************************************************************/
108     /* pointers aligned with start of frame to encode */
109     x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
110     res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
111
112     /***************************************/
113     /* Ensure smooth bandwidth transitions */
114     /***************************************/
115     silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
116
117     /*******************************************/
118     /* Copy new frame to front of input buffer */
119     /*******************************************/
120     silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) );
121
122     if( !psEnc->sCmn.prefillFlag ) {
123         /*****************************************/
124         /* Find pitch lags, initial LPC analysis */
125         /*****************************************/
126         silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame );
127
128         /************************/
129         /* Noise shape analysis */
130         /************************/
131         silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
132
133         /***************************************************/
134         /* Find linear prediction coefficients (LPC + LTP) */
135         /***************************************************/
136         silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
137
138         /****************************************/
139         /* Process gains                        */
140         /****************************************/
141         silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding );
142
143         /*****************************************/
144         /* Prefiltering for noise shaper         */
145         /*****************************************/
146         silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame );
147
148         /****************************************/
149         /* Low Bitrate Redundant Encoding       */
150         /****************************************/
151         silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding );
152
153         /* Loop over quantizer and entropy coding to control bitrate */
154         maxIter = 6;
155         gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
156         found_lower = 0;
157         found_upper = 0;
158         gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
159         gainsID_lower = -1;
160         gainsID_upper = -1;
161         /* Copy part of the input state */
162         silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
163         silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
164         seed_copy = psEnc->sCmn.indices.Seed;
165         ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
166         ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
167         for( iter = 0; ; iter++ ) {
168             if( gainsID == gainsID_lower ) {
169                 nBits = nBits_lower;
170             } else if( gainsID == gainsID_upper ) {
171                 nBits = nBits_upper;
172             } else {
173                 /* Restore part of the input state */
174                 if( iter > 0 ) {
175                     silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
176                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
177                     psEnc->sCmn.indices.Seed = seed_copy;
178                     psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
179                     psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
180                 }
181
182                 /*****************************************/
183                 /* Noise shaping quantization            */
184                 /*****************************************/
185                 if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
186                     silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
187                            sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
188                            sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
189                 } else {
190                     silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses,
191                             sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14,
192                             sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 );
193                 }
194
195                 /****************************************/
196                 /* Encode Parameters                    */
197                 /****************************************/
198                 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
199
200                 /****************************************/
201                 /* Encode Excitation Signal             */
202                 /****************************************/
203                 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
204                     psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
205
206                 nBits = ec_tell( psRangeEnc );
207
208                 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
209                     break;
210                 }
211             }
212
213             if( iter == maxIter ) {
214                 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
215                     /* Restore output state from earlier iteration that did meet the bitrate budget */
216                     silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
217                     silk_assert( sRangeEnc_copy2.offs <= 1275 );
218                     silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
219                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
220                     psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
221                 }
222                 break;
223             }
224
225             if( nBits > maxBits ) {
226                 if( found_lower == 0 && iter >= 2 ) {
227                     /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
228                     sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 );
229                     found_upper = 0;
230                     gainsID_upper = -1;
231                 } else {
232                     found_upper = 1;
233                     nBits_upper = nBits;
234                     gainMult_upper = gainMult_Q8;
235                     gainsID_upper = gainsID;
236                 }
237             } else if( nBits < maxBits - 5 ) {
238                 found_lower = 1;
239                 nBits_lower = nBits;
240                 gainMult_lower = gainMult_Q8;
241                 if( gainsID != gainsID_lower ) {
242                     gainsID_lower = gainsID;
243                     /* Copy part of the output state */
244                     silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
245                     silk_assert( psRangeEnc->offs <= 1275 );
246                     silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
247                     silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
248                     LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
249                 }
250             } else {
251                 /* Within 5 bits of budget: close enough */
252                 break;
253             }
254
255             if( ( found_lower & found_upper ) == 0 ) {
256                 /* Adjust gain according to high-rate rate/distortion curve */
257                 opus_int32 gain_factor_Q16;
258                 gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
259                 gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) );
260                 if( nBits > maxBits ) {
261                     gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) );
262                 }
263                 gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
264             } else {
265                 /* Adjust gain by interpolating */
266                 gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
267                 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
268                 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
269                     gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
270                 } else
271                 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
272                     gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
273                 }
274             }
275
276             for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
277                 sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
278             }
279  
280             /* Quantize gains */
281             psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
282             silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16,
283                   &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
284
285             /* Unique identifier of gains vector */
286             gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
287         }
288     }
289
290     /* Update input buffer */
291     silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
292         ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) );
293
294     /* Parameters needed for next frame */
295     psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
296     psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
297
298     /* Exit without entropy coding */
299     if( psEnc->sCmn.prefillFlag ) {
300         /* No payload */
301         *pnBytesOut = 0;
302         return ret;
303     }
304
305     /****************************************/
306     /* Finalize payload                     */
307     /****************************************/
308     psEnc->sCmn.first_frame_after_reset = 0;
309     /* Payload size */
310     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
311
312     return ret;
313 }
314
315 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
316 static inline void silk_LBRR_encode_FIX(
317     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Pointer to Silk FIX encoder state                                           */
318     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Pointer to Silk FIX encoder control struct                                  */
319     const opus_int32                xfw_Q3[],                               /* I    Input signal                                                                */
320     opus_int                        condCoding                              /* I    The type of conditional coding used so far for this frame                   */
321 )
322 {
323     opus_int32   TempGains_Q16[ MAX_NB_SUBFR ];
324     SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
325     silk_nsq_state sNSQ_LBRR;
326
327     /*******************************************/
328     /* Control use of inband LBRR              */
329     /*******************************************/
330     if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
331         psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
332
333         /* Copy noise shaping quantizer state and quantization indices from regular encoding */
334         silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
335         silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
336
337         /* Save original gains */
338         silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
339
340         if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
341             /* First frame in packet or previous frame not LBRR coded */
342             psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
343
344             /* Increase Gains to get target LBRR rate */
345             psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases;
346             psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
347         }
348
349         /* Decode to get gains in sync with decoder         */
350         /* Overwrite unquantized gains with quantized gains */
351         silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices,
352             &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
353
354         /*****************************************/
355         /* Noise shaping quantization            */
356         /*****************************************/
357         if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) {
358             silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
359                 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
360                 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
361                 psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
362         } else {
363             silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3,
364                 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14,
365                 psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14,
366                 psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 );
367         }
368
369         /* Restore original gains */
370         silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) );
371     }
372 }