CBR: lock the gain on a subframe when the number of pulses stops going down
[opus.git] / silk / float / encode_frame_FLP.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "main_FLP.h"
33 #include "tuning_parameters.h"
34
35 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
36 static OPUS_INLINE void silk_LBRR_encode_FLP(
37     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
38     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
39     const silk_float                xfw[],                              /* I    Input signal                                */
40     opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
41 );
42
43 void silk_encode_do_VAD_FLP(
44     silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
45 )
46 {
47     /****************************/
48     /* Voice Activity Detection */
49     /****************************/
50     silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
51
52     /**************************************************/
53     /* Convert speech activity into VAD and DTX flags */
54     /**************************************************/
55     if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
56         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
57         psEnc->sCmn.noSpeechCounter++;
58         if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
59             psEnc->sCmn.inDTX = 0;
60         } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
61             psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
62             psEnc->sCmn.inDTX           = 0;
63         }
64         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
65     } else {
66         psEnc->sCmn.noSpeechCounter    = 0;
67         psEnc->sCmn.inDTX              = 0;
68         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
69         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
70     }
71 }
72
73 /****************/
74 /* Encode frame */
75 /****************/
76 opus_int silk_encode_frame_FLP(
77     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
78     opus_int32                      *pnBytesOut,                        /* O    Number of payload bytes;                    */
79     ec_enc                          *psRangeEnc,                        /* I/O  compressor data structure                   */
80     opus_int                        condCoding,                         /* I    The type of conditional coding to use       */
81     opus_int                        maxBits,                            /* I    If > 0: maximum number of output bits       */
82     opus_int                        useCBR                              /* I    Flag to force constant-bitrate operation    */
83 )
84 {
85     silk_encoder_control_FLP sEncCtrl;
86     opus_int     i, iter, maxIter, found_upper, found_lower, ret = 0;
87     silk_float   *x_frame, *res_pitch_frame;
88     silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
89     ec_enc       sRangeEnc_copy, sRangeEnc_copy2;
90     silk_nsq_state sNSQ_copy, sNSQ_copy2;
91     opus_int32   seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
92     opus_int32   gainsID, gainsID_lower, gainsID_upper;
93     opus_int16   gainMult_Q8;
94     opus_int16   ec_prevLagIndex_copy;
95     opus_int     ec_prevSignalType_copy;
96     opus_int8    LastGainIndex_copy2;
97     opus_int32   pGains_Q16[ MAX_NB_SUBFR ];
98     opus_uint8   ec_buf_copy[ 1275 ];
99     opus_int     gain_lock[ MAX_NB_SUBFR ] = {0};
100     opus_int16   best_gain_mult[ MAX_NB_SUBFR ];
101     opus_int     best_sum[ MAX_NB_SUBFR ];
102
103     /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
104     LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
105
106     psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
107
108     /**************************************************************/
109     /* Set up Input Pointers, and insert frame in input buffer    */
110     /**************************************************************/
111     /* pointers aligned with start of frame to encode */
112     x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
113     res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
114
115     /***************************************/
116     /* Ensure smooth bandwidth transitions */
117     /***************************************/
118     silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
119
120     /*******************************************/
121     /* Copy new frame to front of input buffer */
122     /*******************************************/
123     silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
124
125     /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
126     for( i = 0; i < 8; i++ ) {
127         x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
128     }
129
130     if( !psEnc->sCmn.prefillFlag ) {
131         /*****************************************/
132         /* Find pitch lags, initial LPC analysis */
133         /*****************************************/
134         silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
135
136         /************************/
137         /* Noise shape analysis */
138         /************************/
139         silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
140
141         /***************************************************/
142         /* Find linear prediction coefficients (LPC + LTP) */
143         /***************************************************/
144         silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame, condCoding );
145
146         /****************************************/
147         /* Process gains                        */
148         /****************************************/
149         silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding );
150
151         /****************************************/
152         /* Low Bitrate Redundant Encoding       */
153         /****************************************/
154         silk_LBRR_encode_FLP( psEnc, &sEncCtrl, x_frame, condCoding );
155
156         /* Loop over quantizer and entroy coding to control bitrate */
157         maxIter = 6;
158         gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
159         found_lower = 0;
160         found_upper = 0;
161         gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
162         gainsID_lower = -1;
163         gainsID_upper = -1;
164         /* Copy part of the input state */
165         silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
166         silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
167         seed_copy = psEnc->sCmn.indices.Seed;
168         ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
169         ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
170         for( iter = 0; ; iter++ ) {
171             if( gainsID == gainsID_lower ) {
172                 nBits = nBits_lower;
173             } else if( gainsID == gainsID_upper ) {
174                 nBits = nBits_upper;
175             } else {
176                 /* Restore part of the input state */
177                 if( iter > 0 ) {
178                     silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
179                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
180                     psEnc->sCmn.indices.Seed = seed_copy;
181                     psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
182                     psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
183                 }
184
185                 /*****************************************/
186                 /* Noise shaping quantization            */
187                 /*****************************************/
188                 silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, x_frame );
189
190                 /****************************************/
191                 /* Encode Parameters                    */
192                 /****************************************/
193                 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
194
195                 /****************************************/
196                 /* Encode Excitation Signal             */
197                 /****************************************/
198                 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
199                       psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
200
201                 nBits = ec_tell( psRangeEnc );
202
203                 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
204                     break;
205                 }
206             }
207
208             if( iter == maxIter ) {
209                 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
210                     /* Restore output state from earlier iteration that did meet the bitrate budget */
211                     silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
212                     silk_assert( sRangeEnc_copy2.offs <= 1275 );
213                     silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
214                     silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
215                     psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
216                 }
217                 break;
218             }
219
220             if( nBits > maxBits ) {
221                 if( found_lower == 0 && iter >= 2 ) {
222                     /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
223                     sEncCtrl.Lambda = silk_max_float(sEncCtrl.Lambda*1.5f, 1.5f);
224                     /* Reducing dithering can help us hit the target. */
225                     psEnc->sCmn.indices.quantOffsetType = 0;
226                     found_upper = 0;
227                     gainsID_upper = -1;
228                 } else {
229                     found_upper = 1;
230                     nBits_upper = nBits;
231                     gainMult_upper = gainMult_Q8;
232                     gainsID_upper = gainsID;
233                 }
234             } else if( nBits < maxBits - 5 ) {
235                 found_lower = 1;
236                 nBits_lower = nBits;
237                 gainMult_lower = gainMult_Q8;
238                 if( gainsID != gainsID_lower ) {
239                     gainsID_lower = gainsID;
240                     /* Copy part of the output state */
241                     silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
242                     silk_assert( psRangeEnc->offs <= 1275 );
243                     silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
244                     silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
245                     LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
246                 }
247             } else {
248                 /* Within 5 bits of budget: close enough */
249                 break;
250             }
251
252             if ( !found_lower && nBits > maxBits ) {
253                 int j;
254                 for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
255                     int sum=0;
256                     for ( j = i*psEnc->sCmn.subfr_length; j < (i+1)*psEnc->sCmn.subfr_length; j++ ) {
257                         sum += abs( psEnc->sCmn.pulses[j] );
258                     }
259                     if ( iter == 0 || (sum < best_sum[i] && !gain_lock[i]) ) {
260                         best_sum[i] = sum;
261                         best_gain_mult[i] = gainMult_Q8;
262                     } else {
263                         gain_lock[i] = 1;
264                     }
265                 }
266             }
267             if( ( found_lower & found_upper ) == 0 ) {
268                 /* Adjust gain according to high-rate rate/distortion curve */
269                 if( nBits > maxBits ) {
270                     if (gainMult_Q8 < 16384) {
271                         gainMult_Q8 *= 2;
272                     } else {
273                         gainMult_Q8 = 32767;
274                     }
275                 } else {
276                     opus_int32 gain_factor_Q16;
277                     gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
278                     gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
279                 }
280             } else {
281                 /* Adjust gain by interpolating */
282                 gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower );
283                 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
284                 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
285                     gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
286                 } else
287                 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
288                     gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
289                 }
290             }
291
292             for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
293                 opus_int16 tmp;
294                 if ( gain_lock[i] ) {
295                     tmp = best_gain_mult[i];
296                 } else {
297                     tmp = gainMult_Q8;
298                 }
299                 pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], tmp ), 8 );
300             }
301
302             /* Quantize gains */
303             psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
304             silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
305                   &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
306
307             /* Unique identifier of gains vector */
308             gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
309
310             /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
311             for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
312                 sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f;
313             }
314         }
315     }
316
317     /* Update input buffer */
318     silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
319         ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
320
321     /* Exit without entropy coding */
322     if( psEnc->sCmn.prefillFlag ) {
323         /* No payload */
324         *pnBytesOut = 0;
325         return ret;
326     }
327
328     /* Parameters needed for next frame */
329     psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
330     psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
331
332     /****************************************/
333     /* Finalize payload                     */
334     /****************************************/
335     psEnc->sCmn.first_frame_after_reset = 0;
336     /* Payload size */
337     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
338
339     return ret;
340 }
341
342 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
343 static OPUS_INLINE void silk_LBRR_encode_FLP(
344     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
345     silk_encoder_control_FLP        *psEncCtrl,                         /* I/O  Encoder control FLP                         */
346     const silk_float                xfw[],                              /* I    Input signal                                */
347     opus_int                        condCoding                          /* I    The type of conditional coding used so far for this frame */
348 )
349 {
350     opus_int     k;
351     opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
352     silk_float   TempGains[ MAX_NB_SUBFR ];
353     SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
354     silk_nsq_state sNSQ_LBRR;
355
356     /*******************************************/
357     /* Control use of inband LBRR              */
358     /*******************************************/
359     if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
360         psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
361
362         /* Copy noise shaping quantizer state and quantization indices from regular encoding */
363         silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
364         silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
365
366         /* Save original gains */
367         silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
368
369         if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
370             /* First frame in packet or previous frame not LBRR coded */
371             psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
372
373             /* Increase Gains to get target LBRR rate */
374             psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
375             psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
376         }
377
378         /* Decode to get gains in sync with decoder */
379         silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
380             &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
381
382         /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
383         for( k = 0; k <  psEnc->sCmn.nb_subfr; k++ ) {
384             psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
385         }
386
387         /*****************************************/
388         /* Noise shaping quantization            */
389         /*****************************************/
390         silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
391             psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
392
393         /* Restore original gains */
394         silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
395     }
396 }