Store decoded SILK pulses as 16-bit vector
[opus.git] / silk / float / pitch_analysis_core_FLP.c
index 94b0fa4..e58f041 100644 (file)
@@ -8,11 +8,11 @@ this list of conditions and the following disclaimer.
 - Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.
-- Neither the name of Internet Society, IETF or IETF Trust, nor the 
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
 names of specific contributors, may be used to endorse or promote
 products derived from this software without specific prior written
 permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
@@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "SigProc_FLP.h"
 #include "SigProc_FIX.h"
 #include "pitch_est_defines.h"
+#include "pitch.h"
 
 #define SCRATCH_SIZE        22
 
@@ -47,7 +48,8 @@ static void silk_P_Ana_calc_corr_st3(
     opus_int            start_lag,          /* I start lag                                                      */
     opus_int            sf_length,          /* I sub frame length                                               */
     opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 );
 
 static void silk_P_Ana_calc_energy_st3(
@@ -73,7 +75,8 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
     const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
     const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
     const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
-    const opus_int      nb_subfr            /* I    Number of 5 ms subframes                                    */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
 )
 {
     opus_int   i, k, d, j;
@@ -84,6 +87,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
     opus_int32 filt_state[ 6 ];
     silk_float threshold, contour_bias;
     silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];
+    opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];
     silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];
     const silk_float *target_ptr, *basis_ptr;
     double    cross_corr, normalizer, energy, energy_tmp;
@@ -174,8 +178,10 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         silk_assert( basis_ptr >= frame_4kHz );
         silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
+
         /* Calculate first vector products before loop */
-        cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz );
+        cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
         normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 
                      silk_energy_FLP( basis_ptr,  sf_length_8kHz ) + 
                      sf_length_8kHz * 4000.0f;
@@ -190,7 +196,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
             silk_assert( basis_ptr >= frame_4kHz );
             silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
-            cross_corr = silk_inner_product_FLP(target_ptr, basis_ptr, sf_length_8kHz);
+            cross_corr = xcorr[ max_lag_4kHz - d ];
 
             /* Add contribution of new sample and remove contribution from oldest sample */
             normalizer +=
@@ -405,7 +411,7 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
         CCmax = -1000.0f;
 
         /* Calculate the correlations and energies needed in stage 3 */
-        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
         silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );
 
         lag_counter = 0;
@@ -471,31 +477,33 @@ opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced,
 }
 
 /***********************************************************************
-/* Calculates the correlations used in stage 3 search. In order to cover
-/* the whole lag codebook for all the searched offset lags (lag +- 2),
-/* the following correlations are needed in each sub frame:
-/* 
-/* sf1: lag range [-8,...,7] total 16 correlations
-/* sf2: lag range [-4,...,4] total 9 correlations
-/* sf3: lag range [-3,....4] total 8 correltions
-/* sf4: lag range [-6,....8] total 15 correlations
-/* 
-/* In total 48 correlations. The direct implementation computed in worst 
-/* case 4*12*5 = 240 correlations, but more likely around 120.
-/***********************************************************************/
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
     const silk_float    frame[],            /* I vector to correlate                                            */
     opus_int            start_lag,          /* I start lag                                                      */
     opus_int            sf_length,          /* I sub frame length                                               */
     opus_int            nb_subfr,           /* I number of subframes                                            */
-    opus_int            complexity          /* I Complexity setting                                             */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 )
 {
-    const silk_float *target_ptr, *basis_ptr;
+    const silk_float *target_ptr;
     opus_int   i, j, k, lag_counter, lag_low, lag_high;
     opus_int   nb_cbk_search, delta, idx, cbk_size;
     silk_float scratch_mem[ SCRATCH_SIZE ];
+    opus_val32 xcorr[ SCRATCH_SIZE ];
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
 
     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
@@ -521,10 +529,11 @@ static void silk_P_Ana_calc_corr_st3(
         /* Calculate the correlations for each subframe */
         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
         for( j = lag_low; j <= lag_high; j++ ) {
-            basis_ptr = target_ptr - ( start_lag + j );
             silk_assert( lag_counter < SCRATCH_SIZE );
-            scratch_mem[ lag_counter ] = (silk_float)silk_inner_product_FLP( target_ptr, basis_ptr, sf_length );
+            scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
             lag_counter++;
         }