Update SILK code using the CELT range coder
[opus.git] / src_common / SKP_Silk_VAD.c
similarity index 83%
rename from src/SKP_Silk_VAD.c
rename to src_common/SKP_Silk_VAD.c
index 30a5aa6..40091cc 100644 (file)
@@ -33,6 +33,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <stdlib.h>\r
 #include "SKP_Silk_main.h"\r
 \r
+#define SKP_SILK_VAD_HANDLE_10MS_FRAMES     1\r
+\r
 /**********************************/\r
 /* Initialization of the Silk VAD */\r
 /**********************************/\r
@@ -72,32 +74,37 @@ const static SKP_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -1200
 /***************************************/\r
 /* Get the speech activity level in Q8 */\r
 /***************************************/\r
-SKP_int SKP_Silk_VAD_GetSA_Q8(                                      /* O    Return value, 0 if success      */\r
-    SKP_Silk_VAD_state              *psSilk_VAD,                    /* I/O  Silk VAD state                  */\r
-    SKP_int                         *pSA_Q8,                        /* O    Speech activity level in Q8     */\r
-    SKP_int                         *pSNR_dB_Q7,                    /* O    SNR for current frame in Q7     */\r
-    SKP_int                         pQuality_Q15[ VAD_N_BANDS ],    /* O    Smoothed SNR for each band      */\r
-    SKP_int                         *pTilt_Q15,                     /* O    current frame's frequency tilt  */\r
-    const SKP_int16                 pIn[],                          /* I    PCM input       [framelength]   */\r
-    const SKP_int                   framelength                     /* I    Input frame length              */\r
+SKP_int SKP_Silk_VAD_GetSA_Q8(                                  /* O    Return value, 0 if success      */\r
+    SKP_Silk_VAD_state          *psSilk_VAD,                    /* I/O  Silk VAD state                  */\r
+    SKP_int                     *pSA_Q8,                        /* O    Speech activity level in Q8     */\r
+    SKP_int                     pQuality_Q15[ VAD_N_BANDS ],    /* O    Smoothed SNR for each band      */\r
+    SKP_int                     *pTilt_Q15,                     /* O    current frame's frequency tilt  */\r
+    const SKP_int16             pIn[],                          /* I    PCM input       [framelength]   */\r
+    const SKP_int               framelength,                    /* I    Input frame length              */\r
+    const SKP_int               fs_kHz                          /* I    Input frame sample frequency    */\r
 )\r
 {\r
     SKP_int   SA_Q15, input_tilt;\r
     SKP_int32 scratch[ 3 * MAX_FRAME_LENGTH / 2 ];\r
     SKP_int   decimated_framelength, dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;\r
     SKP_int32 sumSquared, smooth_coef_Q16;\r
-    SKP_int16 HPstateTmp;\r
+    SKP_int16 HPstateTmp, SNR_dB_Q7;\r
 \r
     SKP_int16 X[ VAD_N_BANDS ][ MAX_FRAME_LENGTH / 2 ];\r
     SKP_int32 Xnrg[ VAD_N_BANDS ];\r
     SKP_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];\r
     SKP_int32 speech_nrg, x_tmp;\r
     SKP_int   ret = 0;\r
+    \r
+#if SKP_SILK_VAD_HANDLE_10MS_FRAMES\r
+    SKP_int   normalizer;\r
+#endif\r
 \r
     /* Safety checks */\r
     SKP_assert( VAD_N_BANDS == 4 );\r
     SKP_assert( MAX_FRAME_LENGTH >= framelength );\r
     SKP_assert( framelength <= 512 );\r
+    SKP_assert( framelength == 8 * SKP_RSHIFT( framelength, 3 ) );\r
 \r
     /***********************/\r
     /* Filter and Decimate */\r
@@ -114,6 +121,7 @@ SKP_int SKP_Silk_VAD_GetSA_Q8(                                      /* O    Retu
     /*********************************************/\r
     /* HP filter on lowest band (differentiator) */\r
     /*********************************************/\r
+    // y( n ) = 0.5 * x( n ) - 0.5 * x( n - 1 )\r
     decimated_framelength = SKP_RSHIFT( framelength, 3 );\r
     X[ 0 ][ decimated_framelength - 1 ] = SKP_RSHIFT( X[ 0 ][ decimated_framelength - 1 ], 1 );\r
     HPstateTmp = X[ 0 ][ decimated_framelength - 1 ];\r
@@ -141,7 +149,7 @@ SKP_int SKP_Silk_VAD_GetSA_Q8(                                      /* O    Retu
         for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {\r
             sumSquared = 0;\r
             for( i = 0; i < dec_subframe_length; i++ ) {\r
-                /* The energy will be less than dec_subframe_length * ( SKP_int16_MIN / 8 )^2.              */\r
+                /* The energy will be less than dec_subframe_length * ( SKP_int16_MIN / 8 ) ^ 2.            */\r
                 /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */\r
                 x_tmp = SKP_RSHIFT( X[ b ][ i + dec_subframe_offset ], 3 );\r
                 sumSquared = SKP_SMLABB( sumSquared, x_tmp, x_tmp );\r
@@ -150,11 +158,11 @@ SKP_int SKP_Silk_VAD_GetSA_Q8(                                      /* O    Retu
                 SKP_assert( sumSquared >= 0 );\r
             }\r
 \r
-            /* add/saturate summed energy of current subframe */\r
+            /* Add/saturate summed energy of current subframe */\r
             if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {\r
                 Xnrg[ b ] = SKP_ADD_POS_SAT32( Xnrg[ b ], sumSquared );\r
             } else {\r
-                /* look-ahead subframe */\r
+                /* Look-ahead subframe */\r
                 Xnrg[ b ] = SKP_ADD_POS_SAT32( Xnrg[ b ], SKP_RSHIFT( sumSquared, 1 ) );\r
             }\r
 \r
@@ -201,15 +209,15 @@ SKP_int SKP_Silk_VAD_GetSA_Q8(                                      /* O    Retu
     }\r
 \r
     /* Mean-of-squares */\r
-    sumSquared = SKP_DIV32_16( sumSquared, VAD_N_BANDS );           /* Q14 */\r
+    sumSquared = SKP_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */\r
 \r
     /* Root-mean-square approximation, scale to dBs, and write to output pointer */\r
-    *pSNR_dB_Q7 = ( SKP_int16 )( 3 * SKP_Silk_SQRT_APPROX( sumSquared ) );  /* Q7 */\r
+    SNR_dB_Q7 = ( SKP_int16 )( 3 * SKP_Silk_SQRT_APPROX( sumSquared ) ); /* Q7 */\r
 \r
     /*********************************/\r
     /* Speech Probability Estimation */\r
     /*********************************/\r
-    SA_Q15 = SKP_Silk_sigm_Q15( SKP_SMULWB( VAD_SNR_FACTOR_Q16, *pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );\r
+    SA_Q15 = SKP_Silk_sigm_Q15( SKP_SMULWB( VAD_SNR_FACTOR_Q16, SNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );\r
 \r
     /**************************/\r
     /* Frequency Tilt Measure */\r
@@ -229,8 +237,19 @@ SKP_int SKP_Silk_VAD_GetSA_Q8(                                      /* O    Retu
     if( speech_nrg <= 0 ) {\r
         SA_Q15 = SKP_RSHIFT( SA_Q15, 1 ); \r
     } else if( speech_nrg < 32768 ) {\r
+        \r
+#if SKP_SILK_VAD_HANDLE_10MS_FRAMES\r
+        /* Energy normalization of frames shorter than 320 samples */\r
+        normalizer = 0;\r
+        while( SKP_LSHIFT( framelength, normalizer ) < 320 ) {\r
+            normalizer++;\r
+        }\r
+        speech_nrg = SKP_LSHIFT_SAT32( speech_nrg, 15 + normalizer );\r
+#else\r
+        speech_nrg = SKP_LSHIFT_SAT32( speech_nrg, 15 );\r
+#endif\r
         /* square-root */\r
-        speech_nrg = SKP_Silk_SQRT_APPROX( SKP_LSHIFT( speech_nrg, 15 ) );\r
+        speech_nrg = SKP_Silk_SQRT_APPROX( speech_nrg );\r
         SA_Q15 = SKP_SMULWB( 32768 + speech_nrg, SA_Q15 ); \r
     }\r
 \r
@@ -240,8 +259,17 @@ SKP_int SKP_Silk_VAD_GetSA_Q8(                                      /* O    Retu
     /***********************************/\r
     /* Energy Level and SNR estimation */\r
     /***********************************/\r
-    /* smoothing coefficient */\r
+    /* Smoothing coefficient */\r
     smooth_coef_Q16 = SKP_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, SKP_SMULWB( SA_Q15, SA_Q15 ) );\r
+    \r
+#if SKP_SILK_VAD_HANDLE_10MS_FRAMES\r
+    if( framelength == 10 * fs_kHz ) {\r
+        smooth_coef_Q16 >>= 1;\r
+    } else {\r
+       SKP_assert( framelength == 20 * fs_kHz );\r
+    }\r
+#endif\r
+\r
     for( b = 0; b < VAD_N_BANDS; b++ ) {\r
         /* compute smoothed energy-to-noise ratio per band */\r
         psSilk_VAD->NrgRatioSmth_Q8[ b ] = SKP_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ], \r