Use dynamic stack allocation in the SILK encoder.
[opus.git] / silk / fixed / burg_modified_FIX.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the 
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "SigProc_FIX.h"
33 #include "define.h"
34 #include "tuning_parameters.h"
35
36 #define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
37
38 #define QA                          25
39 #define N_BITS_HEAD_ROOM            2
40 #define MIN_RSHIFTS                 -16
41 #define MAX_RSHIFTS                 (32 - QA)
42
43 /* Compute reflection coefficients from input signal */
44 void silk_burg_modified(
45     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
46     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
47     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
48     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
49     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
50     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceding samples)    */
51     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
52     const opus_int              D                   /* I    Order                                                       */
53 )
54 {
55     opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
56     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
57     const opus_int16 *x_ptr;
58     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
59     opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
60     opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
61     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
62     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
63
64     silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
65
66     /* Compute autocorrelations, added over subframes */
67     silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
68     if( rshifts > MAX_RSHIFTS ) {
69         C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
70         silk_assert( C0 > 0 );
71         rshifts = MAX_RSHIFTS;
72     } else {
73         lz = silk_CLZ32( C0 ) - 1;
74         rshifts_extra = N_BITS_HEAD_ROOM - lz;
75         if( rshifts_extra > 0 ) {
76             rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
77             C0 = silk_RSHIFT32( C0, rshifts_extra );
78         } else {
79             rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
80             C0 = silk_LSHIFT32( C0, -rshifts_extra );
81         }
82         rshifts += rshifts_extra;
83     }
84     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
85     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
86     if( rshifts > 0 ) {
87         for( s = 0; s < nb_subfr; s++ ) {
88             x_ptr = x + s * subfr_length;
89             for( n = 1; n < D + 1; n++ ) {
90                 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
91                     silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
92             }
93         }
94     } else {
95         for( s = 0; s < nb_subfr; s++ ) {
96             x_ptr = x + s * subfr_length;
97             for( n = 1; n < D + 1; n++ ) {
98                 C_first_row[ n - 1 ] += silk_LSHIFT32(
99                     silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts );
100             }
101         }
102     }
103     silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
104
105     /* Initialize */
106     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
107
108     invGain_Q30 = (opus_int32)1 << 30;
109     reached_max_gain = 0;
110     for( n = 0; n < D; n++ ) {
111         /* Update first row of correlation matrix (without first element) */
112         /* Update last row of correlation matrix (without last element, stored in reversed order) */
113         /* Update C * Af */
114         /* Update C * flipud(Af) (stored in reversed order) */
115         if( rshifts > -2 ) {
116             for( s = 0; s < nb_subfr; s++ ) {
117                 x_ptr = x + s * subfr_length;
118                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
119                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
120                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
121                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
122                 for( k = 0; k < n; k++ ) {
123                     C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
124                     C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
125                     Atmp_QA = Af_QA[ k ];
126                     tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
127                     tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
128                 }
129                 tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
130                 tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
131                 for( k = 0; k <= n; k++ ) {
132                     CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
133                     CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
134                 }
135             }
136         } else {
137             for( s = 0; s < nb_subfr; s++ ) {
138                 x_ptr = x + s * subfr_length;
139                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
140                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
141                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
142                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
143                 for( k = 0; k < n; k++ ) {
144                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
145                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
146                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
147                     tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
148                     tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
149                 }
150                 tmp1 = -tmp1;                                                                           /* Q17 */
151                 tmp2 = -tmp2;                                                                           /* Q17 */
152                 for( k = 0; k <= n; k++ ) {
153                     CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
154                         silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
155                     CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
156                         silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
157                 }
158             }
159         }
160
161         /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
162         tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
163         tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
164         num  = 0;                                                                                       /* Q( -rshifts ) */
165         nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
166         for( k = 0; k < n; k++ ) {
167             Atmp_QA = Af_QA[ k ];
168             lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
169             lz = silk_min( 32 - QA, lz );
170             Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
171
172             tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
173             tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
174             num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
175             nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
176                                                                                 Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
177         }
178         CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
179         CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
180         num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
181         num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
182
183         /* Calculate the next order reflection (parcor) coefficient */
184         if( silk_abs( num ) < nrg ) {
185             rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
186         } else {
187             rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
188         }
189
190         /* Update inverse prediction gain */
191         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
192         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
193         if( tmp1 <= minInvGain_Q30 ) {
194             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
195             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
196             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
197             /* Newton-Raphson iteration */
198             rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
199             rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
200             if( num < 0 ) {
201                 /* Ensure adjusted reflection coefficients has the original sign */
202                 rc_Q31 = -rc_Q31;
203             }
204             invGain_Q30 = minInvGain_Q30;
205             reached_max_gain = 1;
206         } else {
207             invGain_Q30 = tmp1;
208         }
209
210         /* Update the AR coefficients */
211         for( k = 0; k < (n + 1) >> 1; k++ ) {
212             tmp1 = Af_QA[ k ];                                                                  /* QA */
213             tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
214             Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
215             Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
216         }
217         Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
218
219         if( reached_max_gain ) {
220             /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
221             for( k = n + 1; k < D; k++ ) {
222                 Af_QA[ k ] = 0;
223             }
224             break;
225         }
226
227         /* Update C * Af and C * Ab */
228         for( k = 0; k <= n + 1; k++ ) {
229             tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
230             tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
231             CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
232             CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
233         }
234     }
235
236     if( reached_max_gain ) {
237         for( k = 0; k < D; k++ ) {
238             /* Scale coefficients */
239             A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
240         }
241         /* Subtract energy of preceding samples from C0 */
242         if( rshifts > 0 ) {
243             for( s = 0; s < nb_subfr; s++ ) {
244                 x_ptr = x + s * subfr_length;
245                 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
246             }
247         } else {
248             for( s = 0; s < nb_subfr; s++ ) {
249                 x_ptr = x + s * subfr_length;
250                 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
251             }
252         }
253         /* Approximate residual energy */
254         *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
255         *res_nrg_Q = -rshifts;
256     } else {
257         /* Return residual energy */
258         nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
259         tmp1 = (opus_int32)1 << 16;                                                                             /* Q16 */
260         for( k = 0; k < D; k++ ) {
261             Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
262             nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
263             tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
264             A_Q16[ k ] = -Atmp1;
265         }
266         *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */
267         *res_nrg_Q = -rshifts;
268     }
269 }