speed up FLAC__lpc_compute_autocorrelation()
[flac.git] / src / libFLAC / lpc.c
1 /* libFLAC - Free Lossless Audio Codec library
2  * Copyright (C) 2000,2001  Josh Coalson
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA  02111-1307, USA.
18  */
19
20 #include <assert.h>
21 #include <math.h>
22 #include <stdio.h>
23 #include "FLAC/format.h"
24 #include "private/lpc.h"
25
26 #ifndef M_LN2
27 /* math.h in VC++ doesn't seem to have this (how Microsoft is that?) */
28 #define M_LN2 0.69314718055994530942
29 #endif
30
31 void FLAC__lpc_compute_autocorrelation(const real data[], unsigned data_len, unsigned lag, real autoc[])
32 {
33         /* a readable, but slower, version */
34 #if 0
35         real d;
36         unsigned i;
37
38         assert(lag > 0);
39         assert(lag <= data_len);
40
41         while(lag--) {
42                 for(i = lag, d = 0.0; i < data_len; i++)
43                         d += data[i] * data[i - lag];
44                 autoc[lag] = d;
45         }
46 #endif
47
48         /*
49          * this version tends to run faster because of better data locality
50          * ('data_len' is usually much larger than 'lag')
51          */
52         real d;
53         unsigned sample, coeff;
54         const unsigned limit = data_len - lag;
55
56         assert(lag > 0);
57         assert(lag <= data_len);
58
59         for(coeff = 0; coeff < lag; coeff++)
60                 autoc[coeff] = 0.0;
61         for(sample = 0; sample <= limit; sample++){
62                 d = data[sample];
63                 for(coeff = 0; coeff < lag; coeff++)
64                         autoc[coeff] += d * data[sample+coeff];
65         }
66         for(; sample < data_len; sample++){
67                 d = data[sample];
68                 for(coeff = 0; coeff < data_len - sample; coeff++)
69                         autoc[coeff] += d * data[sample+coeff];
70         }
71 }
72
73 void FLAC__lpc_compute_lp_coefficients(const real autoc[], unsigned max_order, real lp_coeff[][FLAC__MAX_LPC_ORDER], real error[])
74 {
75         unsigned i, j;
76         real r, err, ref[FLAC__MAX_LPC_ORDER], lpc[FLAC__MAX_LPC_ORDER];
77
78         assert(0 < max_order);
79         assert(max_order <= FLAC__MAX_LPC_ORDER);
80         assert(autoc[0] != 0.0);
81
82         err = autoc[0];
83
84         for(i = 0; i < max_order; i++) {
85                 /* Sum up this iteration's reflection coefficient. */
86                 r =- autoc[i+1];
87                 for(j = 0; j < i; j++)
88                         r -= lpc[j] * autoc[i-j];
89                 ref[i] = (r/=err);
90
91                 /* Update LPC coefficients and total error. */
92                 lpc[i]=r;
93                 for(j = 0; j < (i>>1); j++) {
94                         real tmp = lpc[j];
95                         lpc[j] += r * lpc[i-1-j];
96                         lpc[i-1-j] += r * tmp;
97                 }
98                 if(i & 1)
99                         lpc[j] += lpc[j] * r;
100
101                 err *= (1.0 - r * r);
102
103                 /* save this order */
104                 for(j = 0; j <= i; j++)
105                         lp_coeff[i][j] = -lpc[j]; /* negate to get FIR filter coeffs */
106                 error[i] = err;
107         }
108 }
109
110 int FLAC__lpc_quantize_coefficients(const real lp_coeff[], unsigned order, unsigned precision, unsigned bits_per_sample, int32 qlp_coeff[], int *shift)
111 {
112         unsigned i;
113         real d, cmax = -1e99;
114
115         assert(bits_per_sample > 0);
116         assert(bits_per_sample <= sizeof(int32)*8);
117         assert(precision > 0);
118         assert(precision >= FLAC__MIN_QLP_COEFF_PRECISION);
119         assert(precision + bits_per_sample < sizeof(int32)*8);
120 #ifdef NDEBUG
121         (void)bits_per_sample; /* silence compiler warning about unused parameter */
122 #endif
123
124         /* drop one bit for the sign; from here on out we consider only |lp_coeff[i]| */
125         precision--;
126
127         for(i = 0; i < order; i++) {
128                 if(lp_coeff[i] == 0.0)
129                         continue;
130                 d = fabs(lp_coeff[i]);
131                 if(d > cmax)
132                         cmax = d;
133         }
134         if(cmax < 0.0) {
135                 /* => coefficients are all 0, which means our constant-detect didn't work */
136                 return 2;
137         }
138         else {
139                 const int maxshift = (int)precision - (int)floor(log(cmax) / M_LN2) - 1;
140                 const int max_shiftlimit = (1 << (FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN-1)) - 1;
141                 const int min_shiftlimit = -max_shiftlimit - 1;
142
143                 *shift = maxshift;
144
145                 if(*shift < min_shiftlimit || *shift > max_shiftlimit) {
146                         return 1;
147                 }
148         }
149
150         if(*shift != 0) { /* just to avoid wasting time... */
151                 for(i = 0; i < order; i++)
152                         qlp_coeff[i] = (int32)floor(lp_coeff[i] * (real)(1 << *shift));
153         }
154         return 0;
155 }
156
157 void FLAC__lpc_compute_residual_from_qlp_coefficients(const int32 data[], unsigned data_len, const int32 qlp_coeff[], unsigned order, int lp_quantization, int32 residual[])
158 {
159 #ifdef FLAC__OVERFLOW_DETECT
160         int64 sumo;
161 #endif
162         unsigned i, j;
163         int32 sum;
164         const int32 *history;
165
166 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
167         fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
168         for(i=0;i<order;i++)
169                 fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
170         fprintf(stderr,"\n");
171 #endif
172         assert(order > 0);
173
174         for(i = 0; i < data_len; i++) {
175 #ifdef FLAC__OVERFLOW_DETECT
176                 sumo = 0;
177 #endif
178                 sum = 0;
179                 history = data;
180                 for(j = 0; j < order; j++) {
181                         sum += qlp_coeff[j] * (*(--history));
182 #ifdef FLAC__OVERFLOW_DETECT
183                         sumo += (int64)qlp_coeff[j] * (int64)(*history);
184                         if(sumo > 2147483647ll || sumo < -2147483648ll) {
185                                 fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,sumo);
186                         }
187 #endif
188                 }
189                 *(residual++) = *(data++) - (sum >> lp_quantization);
190         }
191
192         /* Here's a slower but clearer version:
193         for(i = 0; i < data_len; i++) {
194                 sum = 0;
195                 for(j = 0; j < order; j++)
196                         sum += qlp_coeff[j] * data[i-j-1];
197                 residual[i] = data[i] - (sum >> lp_quantization);
198         }
199         */
200 }
201
202 void FLAC__lpc_restore_signal(const int32 residual[], unsigned data_len, const int32 qlp_coeff[], unsigned order, int lp_quantization, int32 data[])
203 {
204 #ifdef FLAC__OVERFLOW_DETECT
205         int64 sumo;
206 #endif
207         unsigned i, j;
208         int32 sum;
209         const int32 *history;
210
211 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
212         fprintf(stderr,"FLAC__lpc_restore_signal: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
213         for(i=0;i<order;i++)
214                 fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
215         fprintf(stderr,"\n");
216 #endif
217         assert(order > 0);
218
219         for(i = 0; i < data_len; i++) {
220 #ifdef FLAC__OVERFLOW_DETECT
221                 sumo = 0;
222 #endif
223                 sum = 0;
224                 history = data;
225                 for(j = 0; j < order; j++) {
226                         sum += qlp_coeff[j] * (*(--history));
227 #ifdef FLAC__OVERFLOW_DETECT
228                         sumo += (int64)qlp_coeff[j] * (int64)(*history);
229                         if(sumo > 2147483647ll || sumo < -2147483648ll) {
230                                 fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,sumo);
231                         }
232 #endif
233                 }
234                 *(data++) = *(residual++) + (sum >> lp_quantization);
235         }
236
237         /* Here's a slower but clearer version:
238         for(i = 0; i < data_len; i++) {
239                 sum = 0;
240                 for(j = 0; j < order; j++)
241                         sum += qlp_coeff[j] * data[i-j-1];
242                 data[i] = residual[i] + (sum >> lp_quantization);
243         }
244         */
245 }
246
247 real FLAC__lpc_compute_expected_bits_per_residual_sample(real lpc_error, unsigned total_samples)
248 {
249         real escale;
250
251         assert(lpc_error >= 0.0); /* the error can never be negative */
252         assert(total_samples > 0);
253
254         escale = 0.5 * M_LN2 * M_LN2 / (real)total_samples;
255
256         if(lpc_error > 0.0) {
257                 real bps = 0.5 * log(escale * lpc_error) / M_LN2;
258                 if(bps >= 0.0)
259                         return bps;
260                 else
261                         return 0.0;
262         }
263         else {
264                 return 0.0;
265         }
266 }
267
268 unsigned FLAC__lpc_compute_best_order(const real lpc_error[], unsigned max_order, unsigned total_samples, unsigned bits_per_signal_sample)
269 {
270         unsigned order, best_order;
271         real best_bits, tmp_bits;
272
273         assert(max_order > 0);
274
275         best_order = 0;
276         best_bits = FLAC__lpc_compute_expected_bits_per_residual_sample(lpc_error[0], total_samples) * (real)total_samples;
277
278         for(order = 1; order < max_order; order++) {
279                 tmp_bits = FLAC__lpc_compute_expected_bits_per_residual_sample(lpc_error[order], total_samples) * (real)(total_samples - order) + (real)(order * bits_per_signal_sample);
280                 if(tmp_bits < best_bits) {
281                         best_order = order;
282                         best_bits = tmp_bits;
283                 }
284         }
285
286         return best_order+1; /* +1 since index of lpc_error[] is order-1 */
287 }