Store decoded SILK pulses as 16-bit vector
[opus.git] / silk / float / pitch_analysis_core_FLP.c
index fbff90c..e58f041 100644 (file)
@@ -8,11 +8,11 @@ this list of conditions and the following disclaimer.
 - Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.
-- Neither the name of Internet Society, IETF or IETF Trust, nor the 
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
 names of specific contributors, may be used to endorse or promote
 products derived from this software without specific prior written
 permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
@@ -35,9 +35,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "SigProc_FLP.h"
 #include "SigProc_FIX.h"
 #include "pitch_est_defines.h"
+#include "pitch.h"
 
 #define SCRATCH_SIZE        22
-#define eps                 1.192092896e-07f
 
 /************************************************************/
 /* Internally used functions                                */
@@ -48,7 +48,8 @@ static void silk_P_Ana_calc_corr_st3(
     opus_int            start_lag,          /* I start lag                                                      */
     opus_int            sf_length,          /* I sub frame length                                               */
     opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 );
 
 static void silk_P_Ana_calc_energy_st3(
@@ -74,7 +75,8 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
     const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
     const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
     const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int      nb_subfr            /* I    Number of 5 ms subframes                                    */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
 )
 {
     opus_int   i, k, d, j;
@@ -85,6 +87,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
     opus_int32 filt_state[ 6 ];
     silk_float threshold, contour_bias;
     silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];
+    opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];
     silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];
     const silk_float *target_ptr, *basis_ptr;
     double    cross_corr, normalizer, energy, energy_tmp;
@@ -129,8 +132,6 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
     max_lag_4kHz      = PE_MAX_LAG_MS * 4;
     max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;
 
-    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
-
     /* Resample from input sampled at Fs_kHz to 8 kHz */
     if( Fs_kHz == 16 ) {
         /* Resample to 16 -> 8 khz */
@@ -164,6 +165,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
     /******************************************************************************
     * FIRST STAGE, operating in 4 khz
     ******************************************************************************/
+    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
     target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
     for( k = 0; k < nb_subfr >> 1; k++ ) {
         /* Check that we are within range of the array */
@@ -176,27 +178,31 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         silk_assert( basis_ptr >= frame_4kHz );
         silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
+
         /* Calculate first vector products before loop */
-        cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz );
-        normalizer = silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f;
+        cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
+        normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 
+                     silk_energy_FLP( basis_ptr,  sf_length_8kHz ) + 
+                     sf_length_8kHz * 4000.0f;
 
-        C[ 0 ][ min_lag_4kHz ] += (silk_float)(cross_corr / sqrt(normalizer));
+        C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
 
         /* From now on normalizer is computed recursively */
-        for(d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++) {
+        for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
             basis_ptr--;
 
             /* Check that we are within range of the array */
             silk_assert( basis_ptr >= frame_4kHz );
             silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
-            cross_corr = silk_inner_product_FLP(target_ptr, basis_ptr, sf_length_8kHz);
+            cross_corr = xcorr[ max_lag_4kHz - d ];
 
             /* Add contribution of new sample and remove contribution from oldest sample */
             normalizer +=
                 basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] -
                 basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ];
-            C[ 0 ][ d ] += (silk_float)(cross_corr / sqrt( normalizer ));
+            C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer );
         }
         /* Update target pointer */
         target_ptr += sf_length_8kHz;
@@ -214,13 +220,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
 
     /* Escape if correlation is very low already here */
     Cmax = C[ 0 ][ min_lag_4kHz ];
-    target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ];
-    energy = 1000.0f;
-    for( i = 0; i < silk_LSHIFT( sf_length_4kHz, 2 ); i++ ) {
-        energy += target_ptr[i] * (double)target_ptr[i];
-    }
-    threshold = Cmax * Cmax;
-    if( energy / 16.0f > threshold ) {
+    if( Cmax < 0.2f ) {
         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
         *LTPCorr      = 0.0f;
         *lagIndex     = 0;
@@ -287,14 +287,14 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
     }
     for( k = 0; k < nb_subfr; k++ ) {
-        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz );
+        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0;
         for( j = 0; j < length_d_comp; j++ ) {
             d = d_comp[ j ];
             basis_ptr = target_ptr - d;
             cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );
-            energy     = silk_energy_FLP( basis_ptr, sf_length_8kHz );
             if( cross_corr > 0.0f ) {
-                C[ k ][ d ] = (silk_float)(cross_corr * cross_corr / (energy * energy_tmp + eps));
+                energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
+                C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );
             } else {
                 C[ k ][ d ] = 0.0f;
             }
@@ -317,7 +317,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         } else if( Fs_kHz == 16 ) {
             prevLag = silk_RSHIFT( prevLag, 1 );
         }
-        prevLag_log2 = silk_log2((silk_float)prevLag);
+        prevLag_log2 = silk_log2( (silk_float)prevLag );
     } else {
         prevLag_log2 = 0;
     }
@@ -356,23 +356,20 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
                 CBimax_new = i;
             }
         }
-        CCmax_new = silk_max_float(CCmax_new, 0.0f); /* To avoid taking square root of negative number later */
-        CCmax_new_b = CCmax_new;
 
         /* Bias towards shorter lags */
-        lag_log2 = silk_log2((silk_float)d);
-        CCmax_new_b -= PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
+        lag_log2 = silk_log2( (silk_float)d );
+        CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
 
         /* Bias towards previous lag */
         if( prevLag > 0 ) {
             delta_lag_log2_sqr = lag_log2 - prevLag_log2;
             delta_lag_log2_sqr *= delta_lag_log2_sqr;
-            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / (delta_lag_log2_sqr + 0.5f);
+            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f );
         }
 
-        if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */
-            CCmax_new > nb_subfr * search_thres2 * search_thres2    &&  /* Correlation needs to be high enough to be voiced */
-            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz      /* Lag must be in range                             */
+        if( CCmax_new_b > CCmax_b &&                /* Find maximum biased correlation                  */
+            CCmax_new > nb_subfr * search_thres2    /* Correlation needs to be high enough to be voiced */
         ) {
             CCmax_b = CCmax_new_b;
             CCmax   = CCmax_new;
@@ -390,6 +387,10 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         return 1;
     }
 
+    /* Output normalized correlation */
+    *LTPCorr = (silk_float)( CCmax / nb_subfr );
+    silk_assert( *LTPCorr >= 0.0f );
+
     if( Fs_kHz > 8 ) {
         /* Search in original signal */
 
@@ -406,13 +407,11 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         end_lag   = silk_min_int( lag + 2, max_lag );
         lag_new   = lag;                                    /* to avoid undefined lag */
         CBimax    = 0;                                      /* to avoid undefined lag */
-        silk_assert( CCmax >= 0.0f );
-        *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr );    /* Output normalized correlation */
 
         CCmax = -1000.0f;
 
         /* Calculate the correlations and energies needed in stage 3 */
-        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
         silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );
 
         lag_counter = 0;
@@ -430,25 +429,25 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
             Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
         }
 
+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+        energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0;
         for( d = start_lag; d <= end_lag; d++ ) {
             for( j = 0; j < nb_cbk_search; j++ ) {
                 cross_corr = 0.0;
-                energy = eps;
+                energy = energy_tmp;
                 for( k = 0; k < nb_subfr; k++ ) {
-                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];
                     cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ];
+                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];
                 }
                 if( cross_corr > 0.0 ) {
-                    CCmax_new = (silk_float)(cross_corr * cross_corr / energy);
+                    CCmax_new = (silk_float)( 2 * cross_corr / energy );
                     /* Reduce depending on flatness of contour */
                     CCmax_new *= 1.0f - contour_bias * j;
                 } else {
                     CCmax_new = 0.0f;
                 }
 
-                if( CCmax_new > CCmax &&
-                   ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag
-                   ) {
+                if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
                     CCmax   = CCmax_new;
                     lag_new = d;
                     CBimax  = j;
@@ -464,12 +463,10 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         *lagIndex = (opus_int16)( lag_new - min_lag );
         *contourIndex = (opus_int8)CBimax;
     } else {        /* Fs_kHz == 8 */
-        /* Save Lags and correlation */
-        silk_assert( CCmax >= 0.0f );
-        *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */
+        /* Save Lags */
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
-            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
         }
         *lagIndex = (opus_int16)( lag - min_lag_8kHz );
         *contourIndex = (opus_int8)CBimax;
@@ -479,32 +476,34 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
     return 0;
 }
 
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
     const silk_float    frame[],            /* I vector to correlate                                            */
     opus_int            start_lag,          /* I start lag                                                      */
     opus_int            sf_length,          /* I sub frame length                                               */
     opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 )
-    /***********************************************************************
-     Calculates the correlations used in stage 3 search. In order to cover
-     the whole lag codebook for all the searched offset lags (lag +- 2),
-     the following correlations are needed in each sub frame:
-
-     sf1: lag range [-8,...,7] total 16 correlations
-     sf2: lag range [-4,...,4] total 9 correlations
-     sf3: lag range [-3,....4] total 8 correltions
-     sf4: lag range [-6,....8] total 15 correlations
-
-     In total 48 correlations. The direct implementation computed in worst case
-     4*12*5 = 240 correlations, but more likely around 120.
-     **********************************************************************/
 {
-    const silk_float *target_ptr, *basis_ptr;
+    const silk_float *target_ptr;
     opus_int   i, j, k, lag_counter, lag_low, lag_high;
     opus_int   nb_cbk_search, delta, idx, cbk_size;
     silk_float scratch_mem[ SCRATCH_SIZE ];
+    opus_val32 xcorr[ SCRATCH_SIZE ];
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
 
     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
@@ -530,10 +529,11 @@ static void silk_P_Ana_calc_corr_st3(
         /* Calculate the correlations for each subframe */
         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
         for( j = lag_low; j <= lag_high; j++ ) {
-            basis_ptr = target_ptr - ( start_lag + j );
             silk_assert( lag_counter < SCRATCH_SIZE );
-            scratch_mem[ lag_counter ] = (silk_float)silk_inner_product_FLP( target_ptr, basis_ptr, sf_length );
+            scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
             lag_counter++;
         }
 
@@ -552,6 +552,10 @@ static void silk_P_Ana_calc_corr_st3(
     }
 }
 
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively.                                          */
+/********************************************************************/
 static void silk_P_Ana_calc_energy_st3(
     silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
     const silk_float    frame[],            /* I vector to correlate                                            */
@@ -560,10 +564,6 @@ static void silk_P_Ana_calc_energy_st3(
     opus_int            nb_subfr,           /* I number of subframes                                            */
     opus_int            complexity          /* I Complexity setting                                             */
 )
-/****************************************************************
-Calculate the energies for first two subframes. The energies are
-calculated recursively.
-****************************************************************/
 {
     const silk_float *target_ptr, *basis_ptr;
     double    energy;