4d156a03520aa94d4e35bdcacae67fb725a8cc79
[opus.git] / silk / float / encode_frame_FLP.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, (subject to the limitations in the disclaimer below)
5 are permitted provided that the following conditions are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Skype Limited, nor the names of specific
12 contributors, may be used to endorse or promote products derived from
13 this software without specific prior written permission.
14 NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15 BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16 CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "main_FLP.h"
33 #include "tuning_parameters.h"
34
35 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
36 static inline void silk_LBRR_encode_FLP(
37     silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
38     silk_encoder_control_FLP        *psEncCtrl,         /* I/O  Encoder control FLP                     */
39     const silk_float                 xfw[]               /* I    Input signal                            */
40 );
41
42 /****************/
43 /* Encode frame */
44 /****************/
45 opus_int silk_encode_frame_FLP(
46     silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
47     opus_int32                       *pnBytesOut,        /*   O  Number of payload bytes                 */
48     ec_enc                          *psRangeEnc         /* I/O  compressor data structure               */
49 )
50 {
51     silk_encoder_control_FLP sEncCtrl;
52     opus_int     i, ret = 0;
53     silk_float   *x_frame, *res_pitch_frame;
54     silk_float   xfw[ MAX_FRAME_LENGTH ];
55     silk_float   res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
56
57 TIC(ENCODE_FRAME)
58
59     psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
60
61     /**************************************************************/
62     /* Setup Input Pointers, and insert frame in input buffer    */
63     /*************************************************************/
64     /* pointers aligned with start of frame to encode */
65     x_frame         = psEnc->x_buf + psEnc->sCmn.ltp_mem_length;    /* start of frame to encode */
66     res_pitch_frame = res_pitch    + psEnc->sCmn.ltp_mem_length;    /* start of pitch LPC residual frame */
67
68     /****************************/
69     /* Voice Activity Detection */
70     /****************************/
71 TIC(VAD)
72     ret = silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 );
73 TOC(VAD)
74
75     /**************************************************/
76     /* Convert speech activity into VAD and DTX flags */
77     /**************************************************/
78     if( psEnc->sCmn.nFramesEncoded == 0 ) {
79         psEnc->sCmn.inDTX = psEnc->sCmn.useDTX;
80     }
81     if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
82         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
83         psEnc->sCmn.noSpeechCounter++;
84         if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
85             psEnc->sCmn.inDTX = 0;
86         } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
87             psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
88             psEnc->sCmn.inDTX           = 0;
89         }
90         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
91     } else {
92         psEnc->sCmn.noSpeechCounter    = 0;
93         psEnc->sCmn.inDTX              = 0;
94         psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
95         psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
96     }
97
98     /***************************************/
99     /* Ensure smooth bandwidth transitions */
100     /***************************************/
101     silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
102
103     /*******************************************/
104     /* Copy new frame to front of input buffer */
105     /*******************************************/
106     silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
107
108     /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
109     for( i = 0; i < 8; i++ ) {
110         x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
111     }
112
113     /*****************************************/
114     /* Find pitch lags, initial LPC analysis */
115     /*****************************************/
116 TIC(FIND_PITCH)
117     silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame );
118 TOC(FIND_PITCH)
119
120     /************************/
121     /* Noise shape analysis */
122     /************************/
123 TIC(NOISE_SHAPE_ANALYSIS)
124     silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
125 TOC(NOISE_SHAPE_ANALYSIS)
126
127     /***************************************************/
128     /* Find linear prediction coefficients (LPC + LTP) */
129     /***************************************************/
130 TIC(FIND_PRED_COEF)
131     silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame );
132 TOC(FIND_PRED_COEF)
133
134     /****************************************/
135     /* Process gains                        */
136     /****************************************/
137 TIC(PROCESS_GAINS)
138     silk_process_gains_FLP( psEnc, &sEncCtrl );
139 TOC(PROCESS_GAINS)
140
141     /*****************************************/
142     /* Prefiltering for noise shaper         */
143     /*****************************************/
144 TIC(PREFILTER)
145     silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame );
146 TOC(PREFILTER)
147
148     /****************************************/
149     /* Low Bitrate Redundant Encoding       */
150     /****************************************/
151 TIC(LBRR)
152     silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw );
153 TOC(LBRR)
154
155     /*****************************************/
156     /* Noise shaping quantization            */
157     /*****************************************/
158 TIC(NSQ)
159     silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw );
160 TOC(NSQ)
161
162     /* Update input buffer */
163     silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
164         ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
165
166     /* Parameters needed for next frame */
167     psEnc->sCmn.prevLag        = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
168     psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
169
170     /* Exit without entropy coding */
171     if( psEnc->sCmn.prefillFlag ) {
172         /* No payload */
173         *pnBytesOut = 0;
174         return ret;
175     }
176
177     /****************************************/
178     /* Encode Parameters                    */
179     /****************************************/
180 TIC(ENCODE_PARAMS)
181     silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0 );
182 TOC(ENCODE_PARAMS)
183
184     /****************************************/
185     /* Encode Excitation Signal             */
186     /****************************************/
187 TIC(ENCODE_PULSES)
188     silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
189         psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
190 TOC(ENCODE_PULSES)
191
192     /****************************************/
193     /* Finalize payload                     */
194     /****************************************/
195     psEnc->sCmn.first_frame_after_reset = 0;
196     /* Payload size */
197     *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
198 TOC(ENCODE_FRAME)
199
200 #ifdef SAVE_ALL_INTERNAL_DATA
201     /*DEBUG_STORE_DATA( xf.dat,                   pIn_HP_LP,                           psEnc->sCmn.frame_length * sizeof( opus_int16 ) );*/
202     /*DEBUG_STORE_DATA( xfw.dat,                  xfw,                                 psEnc->sCmn.frame_length * sizeof( silk_float ) );*/
203     DEBUG_STORE_DATA( pitchL.dat,               sEncCtrl.pitchL,                                 MAX_NB_SUBFR * sizeof( opus_int   ) );
204     DEBUG_STORE_DATA( pitchG_quantized.dat,     sEncCtrl.LTPCoef,            psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) );
205     DEBUG_STORE_DATA( LTPcorr.dat,              &psEnc->LTPCorr,                                                sizeof( silk_float ) );
206     DEBUG_STORE_DATA( gains.dat,                sEncCtrl.Gains,                          psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
207     DEBUG_STORE_DATA( gains_indices.dat,        &psEnc->sCmn.indices.GainsIndices,       psEnc->sCmn.nb_subfr * sizeof( opus_int8  ) );
208     DEBUG_STORE_DATA( quantOffsetType.dat,      &psEnc->sCmn.indices.quantOffsetType,                           sizeof( opus_int8  ) );
209     DEBUG_STORE_DATA( speech_activity_q8.dat,   &psEnc->sCmn.speech_activity_Q8,                                sizeof( opus_int   ) );
210     DEBUG_STORE_DATA( signalType.dat,           &psEnc->sCmn.indices.signalType,                                sizeof( opus_int8  ) );
211     DEBUG_STORE_DATA( lag_index.dat,            &psEnc->sCmn.indices.lagIndex,                                  sizeof( opus_int16 ) );
212     DEBUG_STORE_DATA( contour_index.dat,        &psEnc->sCmn.indices.contourIndex,                              sizeof( opus_int8  ) );
213     DEBUG_STORE_DATA( per_index.dat,            &psEnc->sCmn.indices.PERIndex,                                  sizeof( opus_int8  ) );
214     DEBUG_STORE_DATA( PredCoef.dat,             &sEncCtrl.PredCoef[ 1 ],          psEnc->sCmn.predictLPCOrder * sizeof( silk_float ) );
215     DEBUG_STORE_DATA( ltp_scale_idx.dat,        &psEnc->sCmn.indices.LTP_scaleIndex,                            sizeof( opus_int8   ) );
216     /*DEBUG_STORE_DATA( xq.dat,                   psEnc->sCmn.sNSQ.xqBuf,                psEnc->sCmn.frame_length * sizeof( silk_float ) );*/
217 #endif
218     return ret;
219 }
220
221 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate  */
222 static inline void silk_LBRR_encode_FLP(
223     silk_encoder_state_FLP          *psEnc,             /* I/O  Encoder state FLP                       */
224     silk_encoder_control_FLP        *psEncCtrl,         /* I/O  Encoder control FLP                     */
225     const silk_float                 xfw[]               /* I    Input signal                            */
226 )
227 {
228     opus_int     k;
229     opus_int32   Gains_Q16[ MAX_NB_SUBFR ];
230     silk_float   TempGains[ MAX_NB_SUBFR ];
231     SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
232     silk_nsq_state sNSQ_LBRR;
233
234     /*******************************************/
235     /* Control use of inband LBRR              */
236     /*******************************************/
237     if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
238         psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
239
240         /* Copy noise shaping quantizer state and quantization indices from regular encoding */
241         silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
242         silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
243
244         /* Save original gains */
245         silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
246
247         if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
248             /* First frame in packet or previous frame not LBRR coded */
249             psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
250
251             /* Increase Gains to get target LBRR rate */
252             psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
253             psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
254         }
255
256         /* Decode to get gains in sync with decoder */
257         silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
258             &psEnc->sCmn.LBRRprevLastGainIndex, psEnc->sCmn.nFramesEncoded, psEnc->sCmn.nb_subfr );
259
260         /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
261         for( k = 0; k <  psEnc->sCmn.nb_subfr; k++ ) {
262             psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
263         }
264
265         /*****************************************/
266         /* Noise shaping quantization            */
267         /*****************************************/
268         silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
269             psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
270
271         /* Restore original gains */
272         silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
273     }
274 }