Fix the side frame conditional coding rules.
[opus.git] / silk / float / encode_frame_FLP.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, (subject to the limitations in the disclaimer below)
5 are permitted provided that the following conditions are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Skype Limited, nor the names of specific
12 contributors, may be used to endorse or promote products derived from
13 this software without specific prior written permission.
14 NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15 BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16 CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "main_FLP.h"
33 #include "tuning_parameters.h"
34
35 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
36 static inline void silk_LBRR_encode_FLP(
37     silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
38     silk_encoder_control_FLP        *psEncCtrl,         /* I/O  Encoder control FLP                     */
39     const silk_float                 xfw[],              /* I    Input signal                            */
40     opus_int                         condCoding         /* I    The type of conditional coding used so far for this frame */
41 );
42
43 /****************/
44 /* Encode frame */
45 /****************/
46 opus_int silk_encode_frame_FLP(
47     silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
48     opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */
49     ec_enc                          *psRangeEnc,        /* I/O  compressor data structure               */
50     opus_int                         condCoding         /* I    The type of conditional coding to use   */
51 )
52 {
53     silk_encoder_control_FLP sEncCtrl;
54     opus_int     i, ret = 0;
55     silk_float   *x_frame, *res_pitch_frame;
56     silk_float   xfw[ MAX_FRAME_LENGTH ];
57     silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
58
59 TIC(ENCODE_FRAME)
60
61     psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
62
63     /**************************************************************/
64     /* Setup Input Pointers, and insert frame in input buffer    */
65     /*************************************************************/
66     /* pointers aligned with start of frame to encode */
67     x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
68     res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
69
70     /****************************/
71     /* Voice Activity Detection */
72     /****************************/
73 TIC(VAD)
74     ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
75 TOC(VAD)
76
77     /**************************************************/
78     /* Convert speech activity into VAD and DTX flags */
79     /**************************************************/
80     if( psEnc->sCmn.nFramesEncoded == 0 ) {
81         psEnc->sCmn.inDTX = psEnc->sCmn.useDTX;
82     }
83     if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
84         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
85         psEnc->sCmn.noSpeechCounter++;
86         if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
87             psEnc->sCmn.inDTX = 0;
88         } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
89             psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
90             psEnc->sCmn.inDTX           = 0;
91         }
92         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
93     } else {
94         psEnc->sCmn.noSpeechCounter    = 0;
95         psEnc->sCmn.inDTX              = 0;
96         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
97         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
98     }
99
100     /***************************************/
101     /* Ensure smooth bandwidth transitions */
102     /***************************************/
103     silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
104
105     /*******************************************/
106     /* Copy new frame to front of input buffer */
107     /*******************************************/
108     silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
109
110     /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
111     for( i = 0; i < 8; i++ ) {
112         x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
113     }
114
115     /*****************************************/
116     /* Find pitch lags, initial LPC analysis */
117     /*****************************************/
118 TIC(FIND_PITCH)
119     silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame );
120 TOC(FIND_PITCH)
121
122     /************************/
123     /* Noise shape analysis */
124     /************************/
125 TIC(NOISE_SHAPE_ANALYSIS)
126     silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
127 TOC(NOISE_SHAPE_ANALYSIS)
128
129     /***************************************************/
130     /* Find linear prediction coefficients (LPC + LTP) */
131     /***************************************************/
132 TIC(FIND_PRED_COEF)
133     silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding );
134 TOC(FIND_PRED_COEF)
135
136     /****************************************/
137     /* Process gains                        */
138     /****************************************/
139 TIC(PROCESS_GAINS)
140     silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding );
141 TOC(PROCESS_GAINS)
142
143     /*****************************************/
144     /* Prefiltering for noise shaper         */
145     /*****************************************/
146 TIC(PREFILTER)
147     silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame );
148 TOC(PREFILTER)
149
150     /****************************************/
151     /* Low Bitrate Redundant Encoding       */
152     /****************************************/
153 TIC(LBRR)
154     silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding );
155 TOC(LBRR)
156
157     /*****************************************/
158     /* Noise shaping quantization            */
159     /*****************************************/
160 TIC(NSQ)
161     silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw );
162 TOC(NSQ)
163
164     /* Update input buffer */
165     silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
166         ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
167
168     /* Parameters needed for next frame */
169     psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
170     psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
171
172     /* Exit without entropy coding */
173     if( psEnc->sCmn.prefillFlag ) {
174         /* No payload */
175         *pnBytesOut = 0;
176         return ret;
177     }
178
179     /****************************************/
180     /* Encode Parameters                    */
181     /****************************************/
182 TIC(ENCODE_PARAMS)
183     silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
184 TOC(ENCODE_PARAMS)
185
186     /****************************************/
187     /* Encode Excitation Signal             */
188     /****************************************/
189 TIC(ENCODE_PULSES)
190     silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
191         psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
192 TOC(ENCODE_PULSES)
193
194     /****************************************/
195     /* Finalize payload                     */
196     /****************************************/
197     psEnc->sCmn.first_frame_after_reset = 0;
198     /* Payload size */
199     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
200 TOC(ENCODE_FRAME)
201
202 #ifdef SAVE_ALL_INTERNAL_DATA
203     /*DEBUG_STORE_DATA( xf.dat,                   pIn_HP_LP,                           psEnc->sCmn.frame_length * sizeof( opus_int16 ) );*/
204     /*DEBUG_STORE_DATA( xfw.dat,                  xfw,                                 psEnc->sCmn.frame_length * sizeof( silk_float ) );*/
205     DEBUG_STORE_DATA( pitchL.dat,               sEncCtrl.pitchL,                                 MAX_NB_SUBFR * sizeof( opus_int   ) );
206     DEBUG_STORE_DATA( pitchG_quantized.dat,     sEncCtrl.LTPCoef,            psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
207     DEBUG_STORE_DATA( LTPcorr.dat,              &psEnc->LTPCorr,                                                sizeof( silk_float ) );
208     DEBUG_STORE_DATA( gains.dat,                sEncCtrl.Gains,                          psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
209     DEBUG_STORE_DATA( gains_indices.dat,        &psEnc->sCmn.indices.GainsIndices,       psEnc->sCmn.nb_subfr * sizeof( opus_int8  ) );
210     DEBUG_STORE_DATA( quantOffsetType.dat,      &psEnc->sCmn.indices.quantOffsetType,                           sizeof( opus_int8  ) );
211     DEBUG_STORE_DATA( speech_activity_q8.dat,   &psEnc->sCmn.speech_activity_Q8,                                sizeof( opus_int   ) );
212     DEBUG_STORE_DATA( signalType.dat,           &psEnc->sCmn.indices.signalType,                                sizeof( opus_int8  ) );
213     DEBUG_STORE_DATA( lag_index.dat,            &psEnc->sCmn.indices.lagIndex,                                  sizeof( opus_int16 ) );
214     DEBUG_STORE_DATA( contour_index.dat,        &psEnc->sCmn.indices.contourIndex,                              sizeof( opus_int8  ) );
215     DEBUG_STORE_DATA( per_index.dat,            &psEnc->sCmn.indices.PERIndex,                                  sizeof( opus_int8  ) );
216     DEBUG_STORE_DATA( PredCoef.dat,             &sEncCtrl.PredCoef[ 1 ],          psEnc->sCmn.predictLPCOrder * sizeof( silk_float ) );
217     DEBUG_STORE_DATA( ltp_scale_idx.dat,        &psEnc->sCmn.indices.LTP_scaleIndex,                            sizeof( opus_int8   ) );
218     /*DEBUG_STORE_DATA( xq.dat,                   psEnc->sCmn.sNSQ.xqBuf,                psEnc->sCmn.frame_length * sizeof( silk_float ) );*/
219 #endif
220     return ret;
221 }
222
223 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
224 static inline void silk_LBRR_encode_FLP(
225     silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
226     silk_encoder_control_FLP        *psEncCtrl,         /* I/O  Encoder control FLP                     */
227     const silk_float                 xfw[],              /* I    Input signal                            */
228     opus_int                         condCoding         /* I    The type of conditional coding used so far for this frame */
229 )
230 {
231     opus_int     k;
232     opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
233     silk_float   TempGains[ MAX_NB_SUBFR ];
234     SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
235     silk_nsq_state sNSQ_LBRR;
236
237     /*******************************************/
238     /* Control use of inband LBRR              */
239     /*******************************************/
240     if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
241         psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
242
243         /* Copy noise shaping quantizer state and quantization indices from regular encoding */
244         silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
245         silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
246
247         /* Save original gains */
248         silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
249
250         if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
251             /* First frame in packet or previous frame not LBRR coded */
252             psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
253
254             /* Increase Gains to get target LBRR rate */
255             psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
256             psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
257         }
258
259         /* Decode to get gains in sync with decoder */
260         silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
261             &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
262
263         /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
264         for( k = 0; k <  psEnc->sCmn.nb_subfr; k++ ) {
265             psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
266         }
267
268         /*****************************************/
269         /* Noise shaping quantization            */
270         /*****************************************/
271         silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
272             psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
273
274         /* Restore original gains */
275         silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
276     }
277 }