Adds Neon assembly for correlation/convolution
[opus.git] / silk / fixed / burg_modified_FIX.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "SigProc_FIX.h"
33 #include "define.h"
34 #include "tuning_parameters.h"
35 #include "pitch.h"
36
37 #define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
38
39 #define QA                          25
40 #define N_BITS_HEAD_ROOM            2
41 #define MIN_RSHIFTS                 -16
42 #define MAX_RSHIFTS                 (32 - QA)
43
44 /* Compute reflection coefficients from input signal */
45 void silk_burg_modified(
46     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
47     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
48     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
49     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
50     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
51     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
52     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
53     const opus_int              D,                  /* I    Order                                                       */
54     int                         arch                /* I    Run-time architecture                                       */
55 )
56 {
57     opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
58     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
59     const opus_int16 *x_ptr;
60     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
61     opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
62     opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
63     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
64     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
65     opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
66
67     silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
68
69     /* Compute autocorrelations, added over subframes */
70     silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
71     if( rshifts > MAX_RSHIFTS ) {
72         C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
73         silk_assert( C0 > 0 );
74         rshifts = MAX_RSHIFTS;
75     } else {
76         lz = silk_CLZ32( C0 ) - 1;
77         rshifts_extra = N_BITS_HEAD_ROOM - lz;
78         if( rshifts_extra > 0 ) {
79             rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
80             C0 = silk_RSHIFT32( C0, rshifts_extra );
81         } else {
82             rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
83             C0 = silk_LSHIFT32( C0, -rshifts_extra );
84         }
85         rshifts += rshifts_extra;
86     }
87     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
88     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
89     if( rshifts > 0 ) {
90         for( s = 0; s < nb_subfr; s++ ) {
91             x_ptr = x + s * subfr_length;
92             for( n = 1; n < D + 1; n++ ) {
93                 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
94                     silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
95             }
96         }
97     } else {
98         for( s = 0; s < nb_subfr; s++ ) {
99             int i;
100             opus_int32 d;
101             x_ptr = x + s * subfr_length;
102             celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
103             for( n = 1; n < D + 1; n++ ) {
104                for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
105                   d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
106                xcorr[ n - 1 ] += d;
107             }
108             for( n = 1; n < D + 1; n++ ) {
109                 C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
110             }
111         }
112     }
113     silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
114
115     /* Initialize */
116     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
117
118     invGain_Q30 = (opus_int32)1 << 30;
119     reached_max_gain = 0;
120     for( n = 0; n < D; n++ ) {
121         /* Update first row of correlation matrix (without first element) */
122         /* Update last row of correlation matrix (without last element, stored in reversed order) */
123         /* Update C * Af */
124         /* Update C * flipud(Af) (stored in reversed order) */
125         if( rshifts > -2 ) {
126             for( s = 0; s < nb_subfr; s++ ) {
127                 x_ptr = x + s * subfr_length;
128                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
129                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
130                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
131                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
132                 for( k = 0; k < n; k++ ) {
133                     C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
134                     C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
135                     Atmp_QA = Af_QA[ k ];
136                     tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
137                     tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
138                 }
139                 tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
140                 tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
141                 for( k = 0; k <= n; k++ ) {
142                     CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
143                     CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
144                 }
145             }
146         } else {
147             for( s = 0; s < nb_subfr; s++ ) {
148                 x_ptr = x + s * subfr_length;
149                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
150                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
151                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
152                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
153                 for( k = 0; k < n; k++ ) {
154                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
155                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
156                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
157                     tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
158                     tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
159                 }
160                 tmp1 = -tmp1;                                                                           /* Q17 */
161                 tmp2 = -tmp2;                                                                           /* Q17 */
162                 for( k = 0; k <= n; k++ ) {
163                     CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
164                         silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
165                     CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
166                         silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
167                 }
168             }
169         }
170
171         /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
172         tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
173         tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
174         num  = 0;                                                                                       /* Q( -rshifts ) */
175         nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
176         for( k = 0; k < n; k++ ) {
177             Atmp_QA = Af_QA[ k ];
178             lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
179             lz = silk_min( 32 - QA, lz );
180             Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
181
182             tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
183             tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
184             num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
185             nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
186                                                                                 Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
187         }
188         CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
189         CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
190         num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
191         num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
192
193         /* Calculate the next order reflection (parcor) coefficient */
194         if( silk_abs( num ) < nrg ) {
195             rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
196         } else {
197             rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
198         }
199
200         /* Update inverse prediction gain */
201         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
202         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
203         if( tmp1 <= minInvGain_Q30 ) {
204             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
205             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
206             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
207             /* Newton-Raphson iteration */
208             rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
209             rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
210             if( num < 0 ) {
211                 /* Ensure adjusted reflection coefficients has the original sign */
212                 rc_Q31 = -rc_Q31;
213             }
214             invGain_Q30 = minInvGain_Q30;
215             reached_max_gain = 1;
216         } else {
217             invGain_Q30 = tmp1;
218         }
219
220         /* Update the AR coefficients */
221         for( k = 0; k < (n + 1) >> 1; k++ ) {
222             tmp1 = Af_QA[ k ];                                                                  /* QA */
223             tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
224             Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
225             Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
226         }
227         Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
228
229         if( reached_max_gain ) {
230             /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
231             for( k = n + 1; k < D; k++ ) {
232                 Af_QA[ k ] = 0;
233             }
234             break;
235         }
236
237         /* Update C * Af and C * Ab */
238         for( k = 0; k <= n + 1; k++ ) {
239             tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
240             tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
241             CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
242             CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
243         }
244     }
245
246     if( reached_max_gain ) {
247         for( k = 0; k < D; k++ ) {
248             /* Scale coefficients */
249             A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
250         }
251         /* Subtract energy of preceding samples from C0 */
252         if( rshifts > 0 ) {
253             for( s = 0; s < nb_subfr; s++ ) {
254                 x_ptr = x + s * subfr_length;
255                 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
256             }
257         } else {
258             for( s = 0; s < nb_subfr; s++ ) {
259                 x_ptr = x + s * subfr_length;
260                 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
261             }
262         }
263         /* Approximate residual energy */
264         *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
265         *res_nrg_Q = -rshifts;
266     } else {
267         /* Return residual energy */
268         nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
269         tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
270         for( k = 0; k < D; k++ ) {
271             Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
272             nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
273             tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
274             A_Q16[ k ] = -Atmp1;
275         }
276         *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
277         *res_nrg_Q = -rshifts;
278     }
279 }