Cisco optimization for x86 & fixed point
[opus.git] / silk / fixed / burg_modified_FIX.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "SigProc_FIX.h"
33 #include "define.h"
34 #include "tuning_parameters.h"
35 #include "pitch.h"
36
37 #define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
38
39 #define QA                          25
40 #define N_BITS_HEAD_ROOM            2
41 #define MIN_RSHIFTS                 -16
42 #define MAX_RSHIFTS                 (32 - QA)
43
44 /* Compute reflection coefficients from input signal */
45 void silk_burg_modified_c(
46     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
47     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
48     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
49     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
50     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
51     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
52     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
53     const opus_int              D,                  /* I    Order                                                       */
54     int                         arch                /* I    Run-time architecture                                       */
55 )
56 {
57     opus_int         k, n, s, lz, rshifts, reached_max_gain;
58     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
59     const opus_int16 *x_ptr;
60     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
61     opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
62     opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
63     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
64     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
65     opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
66     opus_int64       C0_64;
67
68     silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
69
70     /* Compute autocorrelations, added over subframes */
71     C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch );
72     lz = silk_CLZ64(C0_64);
73     rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz;
74     if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS;
75     if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS;
76     
77     if (rshifts > 0) {
78         C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts );        
79     } else {
80         C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts );
81     }
82     
83     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
84     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
85     if( rshifts > 0 ) {
86         for( s = 0; s < nb_subfr; s++ ) {
87             x_ptr = x + s * subfr_length;
88             for( n = 1; n < D + 1; n++ ) {
89                 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
90                     silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts );
91             }
92         }
93     } else {
94         for( s = 0; s < nb_subfr; s++ ) {
95             int i;
96             opus_int32 d;
97             x_ptr = x + s * subfr_length;
98             celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch );
99             for( n = 1; n < D + 1; n++ ) {
100                for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ )
101                   d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] );
102                xcorr[ n - 1 ] += d;
103             }
104             for( n = 1; n < D + 1; n++ ) {
105                 C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts );
106             }
107         }
108     }
109     silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
110
111     /* Initialize */
112     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
113
114     invGain_Q30 = (opus_int32)1 << 30;
115     reached_max_gain = 0;
116     for( n = 0; n < D; n++ ) {
117         /* Update first row of correlation matrix (without first element) */
118         /* Update last row of correlation matrix (without last element, stored in reversed order) */
119         /* Update C * Af */
120         /* Update C * flipud(Af) (stored in reversed order) */
121         if( rshifts > -2 ) {
122             for( s = 0; s < nb_subfr; s++ ) {
123                 x_ptr = x + s * subfr_length;
124                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
125                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
126                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
127                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
128                 for( k = 0; k < n; k++ ) {
129                     C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
130                     C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
131                     Atmp_QA = Af_QA[ k ];
132                     tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
133                     tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
134                 }
135                 tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
136                 tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
137                 for( k = 0; k <= n; k++ ) {
138                     CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
139                     CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
140                 }
141             }
142         } else {
143             for( s = 0; s < nb_subfr; s++ ) {
144                 x_ptr = x + s * subfr_length;
145                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
146                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
147                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
148                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
149                 for( k = 0; k < n; k++ ) {
150                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
151                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
152                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
153                     tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
154                     tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
155                 }
156                 tmp1 = -tmp1;                                                                           /* Q17 */
157                 tmp2 = -tmp2;                                                                           /* Q17 */
158                 for( k = 0; k <= n; k++ ) {
159                     CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
160                         silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
161                     CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
162                         silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
163                 }
164             }
165         }
166
167         /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
168         tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
169         tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
170         num  = 0;                                                                                       /* Q( -rshifts ) */
171         nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
172         for( k = 0; k < n; k++ ) {
173             Atmp_QA = Af_QA[ k ];
174             lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
175             lz = silk_min( 32 - QA, lz );
176             Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
177
178             tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
179             tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
180             num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
181             nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
182                                                                                 Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
183         }
184         CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
185         CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
186         num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
187         num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
188
189         /* Calculate the next order reflection (parcor) coefficient */
190         if( silk_abs( num ) < nrg ) {
191             rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
192         } else {
193             rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
194         }
195
196         /* Update inverse prediction gain */
197         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
198         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
199         if( tmp1 <= minInvGain_Q30 ) {
200             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
201             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
202             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
203             /* Newton-Raphson iteration */
204             rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
205             rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
206             if( num < 0 ) {
207                 /* Ensure adjusted reflection coefficients has the original sign */
208                 rc_Q31 = -rc_Q31;
209             }
210             invGain_Q30 = minInvGain_Q30;
211             reached_max_gain = 1;
212         } else {
213             invGain_Q30 = tmp1;
214         }
215
216         /* Update the AR coefficients */
217         for( k = 0; k < (n + 1) >> 1; k++ ) {
218             tmp1 = Af_QA[ k ];                                                                  /* QA */
219             tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
220             Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
221             Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
222         }
223         Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
224
225         if( reached_max_gain ) {
226             /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
227             for( k = n + 1; k < D; k++ ) {
228                 Af_QA[ k ] = 0;
229             }
230             break;
231         }
232
233         /* Update C * Af and C * Ab */
234         for( k = 0; k <= n + 1; k++ ) {
235             tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
236             tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
237             CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
238             CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
239         }
240     }
241
242     if( reached_max_gain ) {
243         for( k = 0; k < D; k++ ) {
244             /* Scale coefficients */
245             A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
246         }
247         /* Subtract energy of preceding samples from C0 */
248         if( rshifts > 0 ) {
249             for( s = 0; s < nb_subfr; s++ ) {
250                 x_ptr = x + s * subfr_length;
251                 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts );
252             }
253         } else {
254             for( s = 0; s < nb_subfr; s++ ) {
255                 x_ptr = x + s * subfr_length;
256                 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts);
257             }
258         }
259         /* Approximate residual energy */
260         *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
261         *res_nrg_Q = -rshifts;
262     } else {
263         /* Return residual energy */
264         nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
265         tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
266         for( k = 0; k < D; k++ ) {
267             Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
268             nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
269             tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
270             A_Q16[ k ] = -Atmp1;
271         }
272         *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
273         *res_nrg_Q = -rshifts;
274     }
275 }