Makes the encoder more aggressive about meeting the rate target
[opus.git] / silk / float / encode_frame_FLP.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "main_FLP.h"
33 #include "tuning_parameters.h"
34
35 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
36 static OPUS_INLINE void silk_LBRR_encode_FLP(
37     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
38     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
39     const silk_float                xfw[],                              /* I    Input signal                                */
40     opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
41 );
42
43 void silk_encode_do_VAD_FLP(
44     silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
45 )
46 {
47     /****************************/
48     /* Voice Activity Detection */
49     /****************************/
50     silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
51
52     /**************************************************/
53     /* Convert speech activity into VAD and DTX flags */
54     /**************************************************/
55     if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
56         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
57         psEnc->sCmn.noSpeechCounter++;
58         if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
59             psEnc->sCmn.inDTX = 0;
60         } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
61             psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
62             psEnc->sCmn.inDTX           = 0;
63         }
64         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
65     } else {
66         psEnc->sCmn.noSpeechCounter    = 0;
67         psEnc->sCmn.inDTX              = 0;
68         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
69         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
70     }
71 }
72
73 /****************/
74 /* Encode frame */
75 /****************/
76 opus_int silk_encode_frame_FLP(
77     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
78     opus_int32                      *pnBytesOut,                        /* O    Number of payload bytes;                    */
79     ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
80     opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
81     opus_int                        maxBits,                            /* I    If > 0: maximum number of output bits       */
82     opus_int                        useCBR                              /* I    Flag to force constant-bitrate operation    */
83 )
84 {
85     silk_encoder_control_FLP sEncCtrl;
86     opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;
87     silk_float   *x_frame, *res_pitch_frame;
88     silk_float   xfw[ MAX_FRAME_LENGTH ];
89     silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
90     ec_enc       sRangeEnc_copy, sRangeEnc_copy2;
91     silk_nsq_state sNSQ_copy, sNSQ_copy2;
92     opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
93     opus_int32   gainsID, gainsID_lower, gainsID_upper;
94     opus_int16   gainMult_Q8;
95     opus_int16   ec_prevLagIndex_copy;
96     opus_int     ec_prevSignalType_copy;
97     opus_int8    LastGainIndex_copy2;
98     opus_int32   pGains_Q16[ MAX_NB_SUBFR ];
99     opus_uint8   ec_buf_copy[ 1275 ];
100
101     /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
102     LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
103
104     psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
105
106     /**************************************************************/
107     /* Set up Input Pointers, and insert frame in input buffer    */
108     /**************************************************************/
109     /* pointers aligned with start of frame to encode */
110     x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
111     res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
112
113     /***************************************/
114     /* Ensure smooth bandwidth transitions */
115     /***************************************/
116     silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
117
118     /*******************************************/
119     /* Copy new frame to front of input buffer */
120     /*******************************************/
121     silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
122
123     /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
124     for( i = 0; i < 8; i++ ) {
125         x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
126     }
127
128     if( !psEnc->sCmn.prefillFlag ) {
129         /*****************************************/
130         /* Find pitch lags, initial LPC analysis */
131         /*****************************************/
132         silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
133
134         /************************/
135         /* Noise shape analysis */
136         /************************/
137         silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
138
139         /***************************************************/
140         /* Find linear prediction coefficients (LPC + LTP) */
141         /***************************************************/
142         silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
143
144         /****************************************/
145         /* Process gains                        */
146         /****************************************/
147         silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding );
148
149         /*****************************************/
150         /* Prefiltering for noise shaper         */
151         /*****************************************/
152         silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame );
153
154         /****************************************/
155         /* Low Bitrate Redundant Encoding       */
156         /****************************************/
157         silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding );
158
159         /* Loop over quantizer and entroy coding to control bitrate */
160         maxIter = 6;
161         gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
162         found_lower = 0;
163         found_upper = 0;
164         gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
165         gainsID_lower = -1;
166         gainsID_upper = -1;
167         /* Copy part of the input state */
168         silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
169         silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
170         seed_copy = psEnc->sCmn.indices.Seed;
171         ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
172         ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
173         for( iter = 0; ; iter++ ) {
174             if( gainsID == gainsID_lower ) {
175                 nBits = nBits_lower;
176             } else if( gainsID == gainsID_upper ) {
177                 nBits = nBits_upper;
178             } else {
179                 /* Restore part of the input state */
180                 if( iter > 0 ) {
181                     silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
182                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
183                     psEnc->sCmn.indices.Seed = seed_copy;
184                     psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
185                     psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
186                 }
187
188                 /*****************************************/
189                 /* Noise shaping quantization            */
190                 /*****************************************/
191                 silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw );
192
193                 /****************************************/
194                 /* Encode Parameters                    */
195                 /****************************************/
196                 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
197
198                 /****************************************/
199                 /* Encode Excitation Signal             */
200                 /****************************************/
201                 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
202                       psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
203
204                 nBits = ec_tell( psRangeEnc );
205
206                 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
207                     break;
208                 }
209             }
210
211             if( iter == maxIter ) {
212                 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
213                     /* Restore output state from earlier iteration that did meet the bitrate budget */
214                     silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
215                     silk_assert( sRangeEnc_copy2.offs <= 1275 );
216                     silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
217                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
218                     psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
219                 }
220                 break;
221             }
222
223             if( nBits > maxBits ) {
224                 if( found_lower == 0 && iter >= 2 ) {
225                     /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
226                     sEncCtrl.Lambda = silk_max_float(sEncCtrl.Lambda*1.5f, 1.5f);
227                     /* Reducing dithering can help us hit the target. */
228                     psEnc->sCmn.indices.quantOffsetType = 0;
229                     found_upper = 0;
230                     gainsID_upper = -1;
231                 } else {
232                     found_upper = 1;
233                     nBits_upper = nBits;
234                     gainMult_upper = gainMult_Q8;
235                     gainsID_upper = gainsID;
236                 }
237             } else if( nBits < maxBits - 5 ) {
238                 found_lower = 1;
239                 nBits_lower = nBits;
240                 gainMult_lower = gainMult_Q8;
241                 if( gainsID != gainsID_lower ) {
242                     gainsID_lower = gainsID;
243                     /* Copy part of the output state */
244                     silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
245                     silk_assert( psRangeEnc->offs <= 1275 );
246                     silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
247                     silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
248                     LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
249                 }
250             } else {
251                 /* Within 5 bits of budget: close enough */
252                 break;
253             }
254
255             if( ( found_lower & found_upper ) == 0 ) {
256                 /* Adjust gain according to high-rate rate/distortion curve */
257                 if( nBits > maxBits ) {
258                     if (gainMult_Q8 < 16384) {
259                         gainMult_Q8 *= 2;
260                     } else {
261                         gainMult_Q8 = 32767;
262                     }
263                 } else {
264                     opus_int32 gain_factor_Q16;
265                     gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
266                     gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
267                 }
268             } else {
269                 /* Adjust gain by interpolating */
270                 gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower );
271                 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
272                 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
273                     gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
274                 } else
275                 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
276                     gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
277                 }
278             }
279
280             for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
281                 pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 );
282             }
283
284             /* Quantize gains */
285             psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
286             silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
287                   &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
288
289             /* Unique identifier of gains vector */
290             gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
291
292             /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
293             for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
294                 sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f;
295             }
296         }
297     }
298
299     /* Update input buffer */
300     silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
301         ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
302
303     /* Exit without entropy coding */
304     if( psEnc->sCmn.prefillFlag ) {
305         /* No payload */
306         *pnBytesOut = 0;
307         return ret;
308     }
309
310     /* Parameters needed for next frame */
311     psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
312     psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
313
314     /****************************************/
315     /* Finalize payload                     */
316     /****************************************/
317     psEnc->sCmn.first_frame_after_reset = 0;
318     /* Payload size */
319     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
320
321     return ret;
322 }
323
324 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
325 static OPUS_INLINE void silk_LBRR_encode_FLP(
326     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
327     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
328     const silk_float                xfw[],                              /* I    Input signal                                */
329     opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
330 )
331 {
332     opus_int     k;
333     opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
334     silk_float   TempGains[ MAX_NB_SUBFR ];
335     SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
336     silk_nsq_state sNSQ_LBRR;
337
338     /*******************************************/
339     /* Control use of inband LBRR              */
340     /*******************************************/
341     if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
342         psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
343
344         /* Copy noise shaping quantizer state and quantization indices from regular encoding */
345         silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
346         silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
347
348         /* Save original gains */
349         silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
350
351         if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
352             /* First frame in packet or previous frame not LBRR coded */
353             psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
354
355             /* Increase Gains to get target LBRR rate */
356             psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
357             psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
358         }
359
360         /* Decode to get gains in sync with decoder */
361         silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
362             &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
363
364         /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
365         for( k = 0; k <  psEnc->sCmn.nb_subfr; k++ ) {
366             psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
367         }
368
369         /*****************************************/
370         /* Noise shaping quantization            */
371         /*****************************************/
372         silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
373             psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
374
375         /* Restore original gains */
376         silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
377     }
378 }