Store decoded SILK pulses as 16-bit vector
[opus.git] / silk / float / pitch_analysis_core_FLP.c
index f2615b5..e58f041 100644 (file)
@@ -1,28 +1,28 @@
 /***********************************************************************
 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
 Redistribution and use in source and binary forms, with or without
-modification, (subject to the limitations in the disclaimer below)
-are permitted provided that the following conditions are met:
+modification, are permitted provided that the following conditions
+are met:
 - Redistributions of source code must retain the above copyright notice,
 this list of conditions and the following disclaimer.
 - Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.
-- Neither the name of Skype Limited, nor the names of specific
-contributors, may be used to endorse or promote products derived from
-this software without specific prior written permission.
-NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
-BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
-CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
-BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
 #ifdef HAVE_CONFIG_H
@@ -30,59 +30,53 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 
 /*****************************************************************************
-*
 * Pitch analyser function
-*
 ******************************************************************************/
 #include "SigProc_FLP.h"
 #include "SigProc_FIX.h"
 #include "pitch_est_defines.h"
+#include "pitch.h"
 
-#define SCRATCH_SIZE    22
-
-/************************************************************/
-/* Definitions                                              */
-/************************************************************/
-#define eps                     1.192092896e-07f
+#define SCRATCH_SIZE        22
 
 /************************************************************/
 /* Internally used functions                                */
 /************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
-    const silk_float  frame[],            /* I vector to correlate                                            */
-    opus_int         start_lag,          /* I start lag                                                      */
-    opus_int         sf_length,          /* I sub frame length                                               */
-    opus_int         nb_subfr,           /* I number of subframes                                            */
-    opus_int         complexity          /* I Complexity setting                                             */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 );
 
 static void silk_P_Ana_calc_energy_st3(
     silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
-    const silk_float  frame[],            /* I vector to correlate                                            */
-    opus_int         start_lag,          /* I start lag                                                      */
-    opus_int         sf_length,          /* I sub frame length                                               */
-    opus_int         nb_subfr,           /* I number of subframes                                            */
-    opus_int         complexity          /* I Complexity setting                                             */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity          /* I Complexity setting                                             */
 );
 
-/*
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%             CORE PITCH ANALYSIS FUNCTION                %
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-*/
-opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoiced                       */
-    const silk_float  *frame,             /* I signal of length PE_FRAME_LENGTH_MS*Fs_kHz                     */
-    opus_int         *pitch_out,         /* O 4 pitch lag values                                             */
-    opus_int16        *lagIndex,         /* O lag Index                                                      */
-    opus_int8        *contourIndex,      /* O pitch contour Index                                            */
-    silk_float       *LTPCorr,           /* I/O normalized correlation; input: value from previous frame     */
-    opus_int         prevLag,            /* I last lag of previous frame; set to zero is unvoiced            */
-    const silk_float search_thres1,      /* I first stage threshold for lag candidates 0 - 1                 */
-    const silk_float search_thres2,      /* I final threshold for lag candidates 0 - 1                       */
-    const opus_int   Fs_kHz,             /* I sample frequency (kHz)                                         */
-    const opus_int   complexity,         /* I Complexity setting, 0-2, where 2 is highest                    */
-    const opus_int   nb_subfr            /* I    number of 5 ms subframes                                    */
+/************************************************************/
+/* CORE PITCH ANALYSIS FUNCTION                             */
+/************************************************************/
+opus_int silk_pitch_analysis_core_FLP(      /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
+    const silk_float    *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    opus_int            *pitch_out,         /* O    Pitch lag values [nb_subfr]                                 */
+    opus_int16          *lagIndex,          /* O    Lag Index                                                   */
+    opus_int8           *contourIndex,      /* O    Pitch contour Index                                         */
+    silk_float          *LTPCorr,           /* I/O  Normalized correlation; input: value from previous frame    */
+    opus_int            prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */
+    const silk_float    search_thres1,      /* I    First stage threshold for lag candidates 0 - 1              */
+    const silk_float    search_thres2,      /* I    Final threshold for lag candidates 0 - 1                    */
+    const opus_int      Fs_kHz,             /* I    sample frequency (kHz)                                      */
+    const opus_int      complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */
+    const opus_int      nb_subfr,           /* I    Number of 5 ms subframes                                    */
+    int                 arch                /* I    Run-time architecture                                       */
 )
 {
     opus_int   i, k, d, j;
@@ -93,6 +87,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
     opus_int32 filt_state[ 6 ];
     silk_float threshold, contour_bias;
     silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];
+    opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];
     silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];
     const silk_float *target_ptr, *basis_ptr;
     double    cross_corr, normalizer, energy, energy_tmp;
@@ -117,13 +112,13 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
 
     /* Check for valid complexity setting */
-    silk_assert( complexity >= SigProc_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SigProc_PE_MAX_COMPLEX );
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
     silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f );
     silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f );
 
-    /* Setup frame lengths max / min lag for the sampling frequency */
+    /* Set up frame lengths max / min lag for the sampling frequency */
     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
     frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
     frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
@@ -137,8 +132,6 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
     max_lag_4kHz      = PE_MAX_LAG_MS * 4;
     max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;
 
-    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
-
     /* Resample from input sampled at Fs_kHz to 8 kHz */
     if( Fs_kHz == 16 ) {
         /* Resample to 16 -> 8 khz */
@@ -172,6 +165,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
     /******************************************************************************
     * FIRST STAGE, operating in 4 khz
     ******************************************************************************/
+    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));
     target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];
     for( k = 0; k < nb_subfr >> 1; k++ ) {
         /* Check that we are within range of the array */
@@ -184,27 +178,31 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
         silk_assert( basis_ptr >= frame_4kHz );
         silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch );
+
         /* Calculate first vector products before loop */
-        cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz );
-        normalizer = silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f;
+        cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
+        normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 
+                     silk_energy_FLP( basis_ptr,  sf_length_8kHz ) + 
+                     sf_length_8kHz * 4000.0f;
 
-        C[ 0 ][ min_lag_4kHz ] += (silk_float)(cross_corr / sqrt(normalizer));
+        C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );
 
         /* From now on normalizer is computed recursively */
-        for(d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++) {
+        for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {
             basis_ptr--;
 
             /* Check that we are within range of the array */
             silk_assert( basis_ptr >= frame_4kHz );
             silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
 
-            cross_corr = silk_inner_product_FLP(target_ptr, basis_ptr, sf_length_8kHz);
+            cross_corr = xcorr[ max_lag_4kHz - d ];
 
             /* Add contribution of new sample and remove contribution from oldest sample */
             normalizer +=
-                basis_ptr[ 0 ] * basis_ptr[ 0 ] -
-                basis_ptr[ sf_length_8kHz ] * basis_ptr[ sf_length_8kHz ];
-            C[ 0 ][ d ] += (silk_float)(cross_corr / sqrt( normalizer ));
+                basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] -
+                basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ];
+            C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer );
         }
         /* Update target pointer */
         target_ptr += sf_length_8kHz;
@@ -222,13 +220,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
 
     /* Escape if correlation is very low already here */
     Cmax = C[ 0 ][ min_lag_4kHz ];
-    target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ];
-    energy = 1000.0f;
-    for( i = 0; i < silk_LSHIFT( sf_length_4kHz, 2 ); i++ ) {
-        energy += target_ptr[i] * target_ptr[i];
-    }
-    threshold = Cmax * Cmax;
-    if( energy / 16.0f > threshold ) {
+    if( Cmax < 0.2f ) {
         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
         *LTPCorr      = 0.0f;
         *lagIndex     = 0;
@@ -287,7 +279,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
     /*********************************************************************************
     * Find energy of each subframe projected onto its history, for a range of delays
     *********************************************************************************/
-    silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float)); /* Is this needed?*/
+    silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float));
 
     if( Fs_kHz == 8 ) {
         target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * 8 ];
@@ -295,14 +287,14 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
         target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
     }
     for( k = 0; k < nb_subfr; k++ ) {
-        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz );
+        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0;
         for( j = 0; j < length_d_comp; j++ ) {
             d = d_comp[ j ];
             basis_ptr = target_ptr - d;
             cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );
-            energy     = silk_energy_FLP( basis_ptr, sf_length_8kHz );
-            if (cross_corr > 0.0f) {
-                C[ k ][ d ] = (silk_float)(cross_corr * cross_corr / (energy * energy_tmp + eps));
+            if( cross_corr > 0.0f ) {
+                energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
+                C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );
             } else {
                 C[ k ][ d ] = 0.0f;
             }
@@ -325,16 +317,16 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
         } else if( Fs_kHz == 16 ) {
             prevLag = silk_RSHIFT( prevLag, 1 );
         }
-        prevLag_log2 = silk_log2((silk_float)prevLag);
+        prevLag_log2 = silk_log2( (silk_float)prevLag );
     } else {
         prevLag_log2 = 0;
     }
 
-    /* Setup stage 2 codebook based on number of subframes */
+    /* Set up stage 2 codebook based on number of subframes */
     if( nb_subfr == PE_MAX_NB_SUBFR ) {
         cbk_size   = PE_NB_CBKS_STAGE2_EXT;
         Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
-        if( Fs_kHz == 8 && complexity > SigProc_PE_MIN_COMPLEX ) {
+        if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
             /* If input is 8 khz use a larger codebook here because it is last stage */
             nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
         } else {
@@ -364,24 +356,21 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
                 CBimax_new = i;
             }
         }
-        CCmax_new = silk_max_float(CCmax_new, 0.0f); /* To avoid taking square root of negative number later */
-        CCmax_new_b = CCmax_new;
 
         /* Bias towards shorter lags */
-        lag_log2 = silk_log2((silk_float)d);
-        CCmax_new_b -= PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
+        lag_log2 = silk_log2( (silk_float)d );
+        CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2;
 
         /* Bias towards previous lag */
-        if ( prevLag > 0 ) {
+        if( prevLag > 0 ) {
             delta_lag_log2_sqr = lag_log2 - prevLag_log2;
             delta_lag_log2_sqr *= delta_lag_log2_sqr;
-            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / (delta_lag_log2_sqr + 0.5f);
+            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f );
         }
 
-        if ( CCmax_new_b > CCmax_b                                      && /* Find maximum biased correlation                  */
-             CCmax_new > nb_subfr * search_thres2 * search_thres2       && /* Correlation needs to be high enough to be voiced */
-             silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz    /* Lag must be in range                             */
-            ) {
+        if( CCmax_new_b > CCmax_b &&                /* Find maximum biased correlation                  */
+            CCmax_new > nb_subfr * search_thres2    /* Correlation needs to be high enough to be voiced */
+        ) {
             CCmax_b = CCmax_new_b;
             CCmax   = CCmax_new;
             lag     = d;
@@ -398,6 +387,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
         return 1;
     }
 
+    /* Output normalized correlation */
+    *LTPCorr = (silk_float)( CCmax / nb_subfr );
+    silk_assert( *LTPCorr >= 0.0f );
+
     if( Fs_kHz > 8 ) {
         /* Search in original signal */
 
@@ -414,20 +407,18 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
         end_lag   = silk_min_int( lag + 2, max_lag );
         lag_new   = lag;                                    /* to avoid undefined lag */
         CBimax    = 0;                                      /* to avoid undefined lag */
-        silk_assert( CCmax >= 0.0f );
-        *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr );     /* Output normalized correlation */
 
         CCmax = -1000.0f;
 
         /* Calculate the correlations and energies needed in stage 3 */
-        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity );
+        silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
         silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity );
 
         lag_counter = 0;
         silk_assert( lag == silk_SAT16( lag ) );
         contour_bias = PE_FLATCONTOUR_BIAS / lag;
 
-        /* Setup cbk parameters acording to complexity setting and frame length */
+        /* Set up cbk parameters according to complexity setting and frame length */
         if( nb_subfr == PE_MAX_NB_SUBFR ) {
             nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
             cbk_size      = PE_NB_CBKS_STAGE3_MAX;
@@ -438,25 +429,25 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
             Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
         }
 
+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+        energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0;
         for( d = start_lag; d <= end_lag; d++ ) {
             for( j = 0; j < nb_cbk_search; j++ ) {
                 cross_corr = 0.0;
-                energy = eps;
+                energy = energy_tmp;
                 for( k = 0; k < nb_subfr; k++ ) {
-                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];
                     cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ];
+                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];
                 }
                 if( cross_corr > 0.0 ) {
-                    CCmax_new = (silk_float)(cross_corr * cross_corr / energy);
+                    CCmax_new = (silk_float)( 2 * cross_corr / energy );
                     /* Reduce depending on flatness of contour */
                     CCmax_new *= 1.0f - contour_bias * j;
                 } else {
                     CCmax_new = 0.0f;
                 }
 
-                if( CCmax_new > CCmax &&
-                   ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag
-                   ) {
+                if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
                     CCmax   = CCmax_new;
                     lag_new = d;
                     CBimax  = j;
@@ -467,17 +458,17 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
 
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
         }
         *lagIndex = (opus_int16)( lag_new - min_lag );
         *contourIndex = (opus_int8)CBimax;
-    } else {
-        /* Save Lags and correlation */
-        silk_assert( CCmax >= 0.0f );
-        *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */
+    } else {        /* Fs_kHz == 8 */
+        /* Save Lags */
         for( k = 0; k < nb_subfr; k++ ) {
             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );
         }
-        *lagIndex = (opus_int16)( lag - min_lag );
+        *lagIndex = (opus_int16)( lag - min_lag_8kHz );
         *contourIndex = (opus_int8)CBimax;
     }
     silk_assert( *lagIndex >= 0 );
@@ -485,38 +476,40 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
     return 0;
 }
 
+/***********************************************************************
+ * Calculates the correlations used in stage 3 search. In order to cover
+ * the whole lag codebook for all the searched offset lags (lag +- 2),
+ * the following correlations are needed in each sub frame:
+ *
+ * sf1: lag range [-8,...,7] total 16 correlations
+ * sf2: lag range [-4,...,4] total 9 correlations
+ * sf3: lag range [-3,....4] total 8 correltions
+ * sf4: lag range [-6,....8] total 15 correlations
+ *
+ * In total 48 correlations. The direct implementation computed in worst
+ * case 4*12*5 = 240 correlations, but more likely around 120.
+ ***********************************************************************/
 static void silk_P_Ana_calc_corr_st3(
     silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
-    const silk_float  frame[],            /* I vector to correlate                                            */
-    opus_int         start_lag,          /* I start lag                                                      */
-    opus_int         sf_length,          /* I sub frame length                                               */
-    opus_int         nb_subfr,           /* I number of subframes                                            */
-    opus_int         complexity          /* I Complexity setting                                             */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity,         /* I Complexity setting                                             */
+    int                 arch                /* I Run-time architecture                                          */
 )
-    /***********************************************************************
-     Calculates the correlations used in stage 3 search. In order to cover
-     the whole lag codebook for all the searched offset lags (lag +- 2),
-     the following correlations are needed in each sub frame:
-
-     sf1: lag range [-8,...,7] total 16 correlations
-     sf2: lag range [-4,...,4] total 9 correlations
-     sf3: lag range [-3,....4] total 8 correltions
-     sf4: lag range [-6,....8] total 15 correlations
-
-     In total 48 correlations. The direct implementation computed in worst case
-     4*12*5 = 240 correlations, but more likely around 120.
-     **********************************************************************/
 {
-    const silk_float *target_ptr, *basis_ptr;
+    const silk_float *target_ptr;
     opus_int   i, j, k, lag_counter, lag_low, lag_high;
     opus_int   nb_cbk_search, delta, idx, cbk_size;
     silk_float scratch_mem[ SCRATCH_SIZE ];
+    opus_val32 xcorr[ SCRATCH_SIZE ];
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
 
-    silk_assert( complexity >= SigProc_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SigProc_PE_MAX_COMPLEX );
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
-    if( nb_subfr == PE_MAX_NB_SUBFR ){
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
@@ -536,10 +529,11 @@ static void silk_P_Ana_calc_corr_st3(
         /* Calculate the correlations for each subframe */
         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );
         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch );
         for( j = lag_low; j <= lag_high; j++ ) {
-            basis_ptr = target_ptr - ( start_lag + j );
             silk_assert( lag_counter < SCRATCH_SIZE );
-            scratch_mem[ lag_counter ] = (silk_float)silk_inner_product_FLP( target_ptr, basis_ptr, sf_length );
+            scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
             lag_counter++;
         }
 
@@ -558,18 +552,18 @@ static void silk_P_Ana_calc_corr_st3(
     }
 }
 
+/********************************************************************/
+/* Calculate the energies for first two subframes. The energies are */
+/* calculated recursively.                                          */
+/********************************************************************/
 static void silk_P_Ana_calc_energy_st3(
     silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */
-    const silk_float  frame[],            /* I vector to correlate                                            */
-    opus_int         start_lag,          /* I start lag                                                      */
-    opus_int         sf_length,          /* I sub frame length                                               */
-    opus_int         nb_subfr,           /* I number of subframes                                            */
-    opus_int         complexity          /* I Complexity setting                                             */
+    const silk_float    frame[],            /* I vector to correlate                                            */
+    opus_int            start_lag,          /* I start lag                                                      */
+    opus_int            sf_length,          /* I sub frame length                                               */
+    opus_int            nb_subfr,           /* I number of subframes                                            */
+    opus_int            complexity          /* I Complexity setting                                             */
 )
-/****************************************************************
-Calculate the energies for first two subframes. The energies are
-calculated recursively.
-****************************************************************/
 {
     const silk_float *target_ptr, *basis_ptr;
     double    energy;
@@ -578,10 +572,10 @@ calculated recursively.
     silk_float scratch_mem[ SCRATCH_SIZE ];
     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
 
-    silk_assert( complexity >= SigProc_PE_MIN_COMPLEX );
-    silk_assert( complexity <= SigProc_PE_MAX_COMPLEX );
+    silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
+    silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
 
-    if( nb_subfr == PE_MAX_NB_SUBFR ){
+    if( nb_subfr == PE_MAX_NB_SUBFR ) {
         Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
         Lag_CB_ptr    = &silk_CB_lags_stage3[ 0 ][ 0 ];
         nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
@@ -608,11 +602,11 @@ calculated recursively.
         lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) -  matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
         for( i = 1; i < lag_diff; i++ ) {
             /* remove part outside new window */
-            energy -= basis_ptr[sf_length - i] * basis_ptr[sf_length - i];
+            energy -= basis_ptr[sf_length - i] * (double)basis_ptr[sf_length - i];
             silk_assert( energy >= 0.0 );
 
             /* add part that comes into window */
-            energy += basis_ptr[ -i ] * basis_ptr[ -i ];
+            energy += basis_ptr[ -i ] * (double)basis_ptr[ -i ];
             silk_assert( energy >= 0.0 );
             silk_assert( lag_counter < SCRATCH_SIZE );
             scratch_mem[lag_counter] = (silk_float)energy;