Accuracy improvements to help float implementations
[opus.git] / silk / fixed / burg_modified_FIX.c
1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, (subject to the limitations in the disclaimer below)
5 are permitted provided that the following conditions are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Skype Limited, nor the names of specific
12 contributors, may be used to endorse or promote products derived from
13 this software without specific prior written permission.
14 NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15 BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16 CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "SigProc_FIX.h"
33 #include "define.h"
34 #include "tuning_parameters.h"
35
36 #define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
37
38 #define QA                          25
39 #define N_BITS_HEAD_ROOM            2
40 #define MIN_RSHIFTS                 -16
41 #define MAX_RSHIFTS                 (32 - QA)
42
43 /* Compute reflection coefficients from input signal */
44 void silk_burg_modified(
45     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
46     opus_int                    *res_nrg_Q,         /* O    Residual energy Q value                                     */
47     opus_int32                  A_Q16[],            /* O    Prediction coefficients (length order)                      */
48     const opus_int16            x[],                /* I    Input signal, length: nb_subfr * ( D + subfr_length )       */
49     const opus_int32            minInvGain_Q30,     /* I    Inverse of max prediction gain                              */
50     const opus_int              subfr_length,       /* I    Input signal subframe length (incl. D preceeding samples)   */
51     const opus_int              nb_subfr,           /* I    Number of subframes stacked in x                            */
52     const opus_int              D                   /* I    Order                                                       */
53 )
54 {
55     opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
56     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
57     const opus_int16 *x_ptr;
58     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
59     opus_int32       C_last_row[  SILK_MAX_ORDER_LPC ];
60     opus_int32       Af_QA[       SILK_MAX_ORDER_LPC ];
61     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
62     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
63
64     silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
65
66     /* Compute autocorrelations, added over subframes */
67     silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
68     if( rshifts > MAX_RSHIFTS ) {
69         C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
70         silk_assert( C0 > 0 );
71         rshifts = MAX_RSHIFTS;
72     } else {
73         lz = silk_CLZ32( C0 ) - 1;
74         rshifts_extra = N_BITS_HEAD_ROOM - lz;
75         if( rshifts_extra > 0 ) {
76             rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
77             C0 = silk_RSHIFT32( C0, rshifts_extra );
78         } else {
79             rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
80             C0 = silk_LSHIFT32( C0, -rshifts_extra );
81         }
82         rshifts += rshifts_extra;
83     }
84     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
85     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
86     if( rshifts > 0 ) {
87         for( s = 0; s < nb_subfr; s++ ) {
88             x_ptr = x + s * subfr_length;
89             for( n = 1; n < D + 1; n++ ) {
90                 C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64(
91                     silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts );
92             }
93         }
94     } else {
95         for( s = 0; s < nb_subfr; s++ ) {
96             x_ptr = x + s * subfr_length;
97             for( n = 1; n < D + 1; n++ ) {
98                 C_first_row[ n - 1 ] += silk_LSHIFT32(
99                     silk_inner_prod_aligned( x_ptr, x_ptr + n, subfr_length - n ), -rshifts );
100             }
101         }
102     }
103     silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
104
105     /* Initialize */
106     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
107
108     invGain_Q30 = 1 << 30;
109     reached_max_gain = 0;
110     for( n = 0; n < D; n++ ) {
111         /* Update first row of correlation matrix (without first element) */
112         /* Update last row of correlation matrix (without last element, stored in reversed order) */
113         /* Update C * Af */
114         /* Update C * flipud(Af) (stored in reversed order) */
115         if( rshifts > -2 ) {
116             for( s = 0; s < nb_subfr; s++ ) {
117                 x_ptr = x + s * subfr_length;
118                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    16 - rshifts );        /* Q(16-rshifts) */
119                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts );        /* Q(16-rshifts) */
120                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    QA - 16 );             /* Q(QA-16) */
121                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 );             /* Q(QA-16) */
122                 for( k = 0; k < n; k++ ) {
123                     C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
124                     C_last_row[ k ]  = silk_SMLAWB( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
125                     Atmp_QA = Af_QA[ k ];
126                     tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ]            );                 /* Q(QA-16) */
127                     tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] );                 /* Q(QA-16) */
128                 }
129                 tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
130                 tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts );                                       /* Q(16-rshifts) */
131                 for( k = 0; k <= n; k++ ) {
132                     CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ]                    );        /* Q( -rshift ) */
133                     CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] );        /* Q( -rshift ) */
134                 }
135             }
136         } else {
137             for( s = 0; s < nb_subfr; s++ ) {
138                 x_ptr = x + s * subfr_length;
139                 x1  = -silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    -rshifts );            /* Q( -rshifts ) */
140                 x2  = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts );            /* Q( -rshifts ) */
141                 tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ],                    17 );                  /* Q17 */
142                 tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 );                  /* Q17 */
143                 for( k = 0; k < n; k++ ) {
144                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
145                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
146                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
147                     tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
148                     tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
149                 }
150                 tmp1 = -tmp1;                                                                           /* Q17 */
151                 tmp2 = -tmp2;                                                                           /* Q17 */
152                 for( k = 0; k <= n; k++ ) {
153                     CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1,
154                         silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) );                    /* Q( -rshift ) */
155                     CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2,
156                         silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */
157                 }
158             }
159         }
160
161         /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
162         tmp1 = C_first_row[ n ];                                                                        /* Q( -rshifts ) */
163         tmp2 = C_last_row[ n ];                                                                         /* Q( -rshifts ) */
164         num  = 0;                                                                                       /* Q( -rshifts ) */
165         nrg  = silk_ADD32( CAb[ 0 ], CAf[ 0 ] );                                                        /* Q( 1-rshifts ) */
166         for( k = 0; k < n; k++ ) {
167             Atmp_QA = Af_QA[ k ];
168             lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1;
169             lz = silk_min( 32 - QA, lz );
170             Atmp1 = silk_LSHIFT32( Atmp_QA, lz );                                                       /* Q( QA + lz ) */
171
172             tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[  n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
173             tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
174             num  = silk_ADD_LSHIFT32( num,  silk_SMMUL( CAb[ n - k ],             Atmp1 ), 32 - QA - lz );  /* Q( -rshifts ) */
175             nrg  = silk_ADD_LSHIFT32( nrg,  silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ),
176                                                                                 Atmp1 ), 32 - QA - lz );    /* Q( 1-rshifts ) */
177         }
178         CAf[ n + 1 ] = tmp1;                                                                            /* Q( -rshifts ) */
179         CAb[ n + 1 ] = tmp2;                                                                            /* Q( -rshifts ) */
180         num = silk_ADD32( num, tmp2 );                                                                  /* Q( -rshifts ) */
181         num = silk_LSHIFT32( -num, 1 );                                                                 /* Q( 1-rshifts ) */
182
183         /* Calculate the next order reflection (parcor) coefficient */
184         if( silk_abs( num ) < nrg ) {
185             rc_Q31 = silk_DIV32_varQ( num, nrg, 31 );
186         } else {
187             rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN;
188         }
189
190         /* Update inverse prediction gain */
191         tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 );
192         tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 );
193         if( tmp1 <= minInvGain_Q30 ) {
194             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
195             tmp2 = ( 1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
196             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
197             /* Newton-Raphson iteration */
198             rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
199             rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
200             if( num < 0 ) {
201                 /* Ensure adjusted reflection coefficients has the original sign */
202                 rc_Q31 = -rc_Q31;
203             }
204             invGain_Q30 = minInvGain_Q30;
205             reached_max_gain = 1;
206         } else {
207             invGain_Q30 = tmp1;
208         }
209
210         /* Update the AR coefficients */
211         for( k = 0; k < (n + 1) >> 1; k++ ) {
212             tmp1 = Af_QA[ k ];                                                                  /* QA */
213             tmp2 = Af_QA[ n - k - 1 ];                                                          /* QA */
214             Af_QA[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );      /* QA */
215             Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );      /* QA */
216         }
217         Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA );                                          /* QA */
218
219         if( reached_max_gain ) {
220             /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
221             for( k = n + 1; k < D; k++ ) {
222                 Af_QA[ k ] = 0;
223             }
224             break;
225         }
226
227         /* Update C * Af and C * Ab */
228         for( k = 0; k <= n + 1; k++ ) {
229             tmp1 = CAf[ k ];                                                                    /* Q( -rshifts ) */
230             tmp2 = CAb[ n - k + 1 ];                                                            /* Q( -rshifts ) */
231             CAf[ k ]         = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 );        /* Q( -rshifts ) */
232             CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 );        /* Q( -rshifts ) */
233         }
234     }
235
236     if( reached_max_gain ) {
237         for( k = 0; k < D; k++ ) {
238             /* Scale coefficients */
239             A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );
240         }
241         /* Subtract energy of preceeding samples from C0 */
242         if( rshifts > 0 ) {
243             for( s = 0; s < nb_subfr; s++ ) {
244                 x_ptr = x + s * subfr_length;
245                 C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts );
246             }
247         } else {
248             for( s = 0; s < nb_subfr; s++ ) {
249                 x_ptr = x + s * subfr_length;
250                 C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts );
251             }
252         }
253         /* Approximate residual energy */
254         *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 );
255         *res_nrg_Q = -rshifts;
256     } else {
257         /* Return residual energy */
258         nrg  = CAf[ 0 ];                                                                            /* Q( -rshifts ) */
259         tmp1 = 1 << 16;                                                                             /* Q16 */
260         for( k = 0; k < D; k++ ) {
261             Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 );                                       /* Q16 */
262             nrg  = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 );                                         /* Q( -rshifts ) */
263             tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 );                                               /* Q16 */
264             A_Q16[ k ] = -Atmp1;
265         }
266         *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( FIND_LPC_COND_FAC, C0 ), -tmp1 );                  /* Q( -rshifts ) */
267         *res_nrg_Q = -rshifts;
268     }   
269 }