Silk makes use of Opus VAD
[opus.git] / silk / float / encode_frame_FLP.c
index 1d4d8a7..b029c3f 100644 (file)
@@ -42,18 +42,25 @@ static OPUS_INLINE void silk_LBRR_encode_FLP(
 );
 
 void silk_encode_do_VAD_FLP(
-    silk_encoder_state_FLP          *psEnc                              /* I/O  Encoder state FLP                           */
+    silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
+    opus_int                        activity                            /* I    Decision of Opus voice activity detector    */
 )
 {
+    const opus_int activity_threshold = SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 );
+
     /****************************/
     /* Voice Activity Detection */
     /****************************/
     silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
+    /* If Opus VAD is inactive and Silk VAD is active: lower Silk VAD to just under the threshold */
+    if( activity == VAD_NO_ACTIVITY && psEnc->sCmn.speech_activity_Q8 >= activity_threshold ) {
+        psEnc->sCmn.speech_activity_Q8 = activity_threshold - 1;
+    }
 
     /**************************************************/
     /* Convert speech activity into VAD and DTX flags */
     /**************************************************/
-    if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
+    if( psEnc->sCmn.speech_activity_Q8 < activity_threshold ) {
         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
         psEnc->sCmn.noSpeechCounter++;
         if( psEnc->sCmn.noSpeechCounter <= NB_SPEECH_FRAMES_BEFORE_DTX ) {