Avoid processing LPC coeffs beyond the given order in NEON optimizations
[opus.git] / silk / fixed / burg_modified_FIX.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "SigProc_FIX.h"
33 #include "define.h"
34 #include "tuning_parameters.h"
35 #include "pitch.h"
36
37 #define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
38
39 #define QA                          25
40 #define N_BITS_HEAD_ROOM            3
41 #define MIN_RSHIFTS                 -16
42 #define MAX_RSHIFTS                 (32 - QA)
43
44 /* Compute reflection coefficients from input signal */
45 void silk_burg_modified_c(
46     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
47     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
48     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
49     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
50     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
51     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
52     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
53     const opus_int              D,                  /* I    Order                                                       */
54     int                         arch                /* I    Run-time architecture                                       */
55 )
56 {
57     opus_int         k, n, s, lz, rshifts, reached_max_gain;
58     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
59     const opus_int16 *x_ptr;
60     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
61     opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
62     opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
63     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
64     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
65     opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
66     opus_int64       C0_64;
67
68     celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
69
70     /* Compute autocorrelations, added over subframes */
71     C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch );
72     lz = silk_CLZ64(C0_64);
73     rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz;
74     if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS;
75     if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS;
76
77     if (rshifts > 0) {
78         C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts );
79     } else {
80         C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts );
81     }
82
83     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
84     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
85     if( rshifts > 0 ) {
86         for( s = 0; s < nb_subfr; s++ ) {
87             x_ptr = x + s * subfr_length;
88             for( n = 1; n < D + 1; n++ ) {
89                 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
90                     silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
91             }
92         }
93     } else {
94         for( s = 0; s < nb_subfr; s++ ) {
95             int i;
96             opus_int32 d;
97             x_ptr = x + s * subfr_length;
98             celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
99             for( n = 1; n < D + 1; n++ ) {
100                for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
101                   d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
102                xcorr[ n - 1 ] += d;
103             }
104             for( n = 1; n < D + 1; n++ ) {
105                 C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
106             }
107         }
108     }
109     silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
110
111     /* Initialize */
112     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
113
114     invGain_Q30 = (opus_int32)1 << 30;
115     reached_max_gain = 0;
116     for( n = 0; n < D; n++ ) {
117         /* Update first row of correlation matrix (without first element) */
118         /* Update last row of correlation matrix (without last element, stored in reversed order) */
119         /* Update C * Af */
120         /* Update C * flipud(Af) (stored in reversed order) */
121         if( rshifts > -2 ) {
122             for( s = 0; s < nb_subfr; s++ ) {
123                 x_ptr = x + s * subfr_length;
124                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
125                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
126                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
127                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
128                 for( k = 0; k < n; k++ ) {
129                     C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
130                     C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
131                     Atmp_QA = Af_QA[ k ];
132                     tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
133                     tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
134                 }
135                 tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
136                 tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
137                 for( k = 0; k <= n; k++ ) {
138                     CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
139                     CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
140                 }
141             }
142         } else {
143             for( s = 0; s < nb_subfr; s++ ) {
144                 x_ptr = x + s * subfr_length;
145                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
146                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
147                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
148                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
149                 for( k = 0; k < n; k++ ) {
150                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
151                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
152                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
153                     /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32),
154                        but they cancel each other and the real result seems to always fit in a 32-bit
155                        signed integer. This was determined experimentally, not theoretically (unfortunately). */
156                     tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
157                     tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
158                 }
159                 tmp1 = -tmp1;                                                                           /* Q17 */
160                 tmp2 = -tmp2;                                                                           /* Q17 */
161                 for( k = 0; k <= n; k++ ) {
162                     CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
163                         silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
164                     CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
165                         silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
166                 }
167             }
168         }
169
170         /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
171         tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
172         tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
173         num  = 0;                                                                                       /* Q( -rshifts ) */
174         nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
175         for( k = 0; k < n; k++ ) {
176             Atmp_QA = Af_QA[ k ];
177             lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
178             lz = silk_min( 32 - QA, lz );
179             Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
180
181             tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
182             tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
183             num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
184             nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
185                                                                                 Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
186         }
187         CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
188         CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
189         num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
190         num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
191
192         /* Calculate the next order reflection (parcor) coefficient */
193         if( silk_abs( num ) < nrg ) {
194             rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
195         } else {
196             rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
197         }
198
199         /* Update inverse prediction gain */
200         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
201         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
202         if( tmp1 <= minInvGain_Q30 ) {
203             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
204             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
205             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
206             if( rc_Q31 > 0 ) {
207                 /* Newton-Raphson iteration */
208                 rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                       /* Q15 */
209                 rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                                   /* Q31 */
210                 if( num < 0 ) {
211                     /* Ensure adjusted reflection coefficients has the original sign */
212                     rc_Q31 = -rc_Q31;
213                 }
214             }
215             invGain_Q30 = minInvGain_Q30;
216             reached_max_gain = 1;
217         } else {
218             invGain_Q30 = tmp1;
219         }
220
221         /* Update the AR coefficients */
222         for( k = 0; k < (n + 1) >> 1; k++ ) {
223             tmp1 = Af_QA[ k ];                                                                  /* QA */
224             tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
225             Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
226             Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
227         }
228         Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
229
230         if( reached_max_gain ) {
231             /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
232             for( k = n + 1; k < D; k++ ) {
233                 Af_QA[ k ] = 0;
234             }
235             break;
236         }
237
238         /* Update C * Af and C * Ab */
239         for( k = 0; k <= n + 1; k++ ) {
240             tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
241             tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
242             CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
243             CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
244         }
245     }
246
247     if( reached_max_gain ) {
248         for( k = 0; k < D; k++ ) {
249             /* Scale coefficients */
250             A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
251         }
252         /* Subtract energy of preceding samples from C0 */
253         if( rshifts > 0 ) {
254             for( s = 0; s < nb_subfr; s++ ) {
255                 x_ptr = x + s * subfr_length;
256                 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
257             }
258         } else {
259             for( s = 0; s < nb_subfr; s++ ) {
260                 x_ptr = x + s * subfr_length;
261                 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts);
262             }
263         }
264         /* Approximate residual energy */
265         *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
266         *res_nrg_Q = -rshifts;
267     } else {
268         /* Return residual energy */
269         nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
270         tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
271         for( k = 0; k < D; k++ ) {
272             Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
273             nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
274             tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
275             A_Q16[ k ] = -Atmp1;
276         }
277         *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
278         *res_nrg_Q = -rshifts;
279     }
280 }