Adds support for delayed decision
[opus.git] / src / analysis.c
1 /* Copyright (c) 2011 Xiph.Org Foundation
2    Written by Jean-Marc Valin */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "kiss_fft.h"
33 #include "celt.h"
34 #include "modes.h"
35 #include "arch.h"
36 #include "quant_bands.h"
37 #include <stdio.h>
38 #include "analysis.h"
39 #include "mlp.h"
40
41 extern const MLP net;
42
43 #ifndef M_PI
44 #define M_PI 3.141592653
45 #endif
46
47 static const float dct_table[128] = {
48         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
49         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
50         0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
51        -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
52         0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
53        -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
54         0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
55         0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
56         0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
57         0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
58         0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
59        -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
60         0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
61        -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
62         0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
63         0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
64 };
65
66 static const float analysis_window[240] = {
67       0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
68       0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
69       0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
70       0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
71       0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
72       0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
73       0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
74       0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
75       0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
76       0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
77       0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
78       0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
79       0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
80       0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
81       0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
82       0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
83       0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
84       0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
85       0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
86       0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
87       0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
88       0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
89       0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
90       0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
91       0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
92       0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
93       0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
94       0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
95       0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
96       0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
97 };
98
99 static const int tbands[NB_TBANDS+1] = {
100        2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
101 };
102
103 static const int extra_bands[NB_TOT_BANDS+1] = {
104       0, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
105 };
106
107 /*static const float tweight[NB_TBANDS+1] = {
108       .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
109 };*/
110
111 #define NB_TONAL_SKIP_BANDS 9
112
113 #define cA 0.43157974f
114 #define cB 0.67848403f
115 #define cC 0.08595542f
116 #define cE ((float)M_PI/2)
117 static inline float fast_atan2f(float y, float x) {
118    float x2, y2;
119    /* Should avoid underflow on the values we'll get */
120    if (ABS16(x)+ABS16(y)<1e-9f)
121    {
122       x*=1e12f;
123       y*=1e12f;
124    }
125    x2 = x*x;
126    y2 = y*y;
127    if(x2<y2){
128       float den = (y2 + cB*x2) * (y2 + cC*x2);
129       if (den!=0)
130          return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
131       else
132          return (y<0 ? -cE : cE);
133    }else{
134       float den = (x2 + cB*y2) * (x2 + cC*y2);
135       if (den!=0)
136          return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
137       else
138          return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
139    }
140 }
141
142 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
143 {
144 #if 1
145    int pos;
146    int curr_lookahead;
147    float psum;
148    int i;
149
150    pos = tonal->read_pos;
151    curr_lookahead = tonal->write_pos-tonal->read_pos;
152    if (curr_lookahead<0)
153       curr_lookahead += DETECT_SIZE;
154
155    if (len > 480 && pos != tonal->write_pos)
156    {
157       pos++;
158       if (pos==DETECT_SIZE)
159          pos=0;
160    }
161    if (pos == tonal->write_pos)
162       pos--;
163    if (pos<0)
164       pos = DETECT_SIZE-1;
165    OPUS_COPY(info_out, &tonal->info[pos], 1);
166    tonal->read_subframe += len/120;
167    while (tonal->read_subframe>=4)
168    {
169       tonal->read_subframe -= 4;
170       tonal->read_pos++;
171    }
172    if (tonal->read_pos>=DETECT_SIZE)
173       tonal->read_pos-=DETECT_SIZE;
174
175    /* Compensate for the delay in the features themselves.
176       FIXME: Need a better estimate the 10 I just made up */
177    curr_lookahead = IMAX(curr_lookahead-10, 0);
178
179    psum=0;
180    for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
181       psum += tonal->pmusic[i];
182    for (;i<DETECT_SIZE;i++)
183       psum += tonal->pspeech[i];
184    /*printf("%f %f\n", psum, info_out->music_prob);*/
185
186    info_out->music_prob = psum;
187 #else
188    /* If data not available, return invalid */
189    if (tonal->read_pos==tonal->write_pos)
190    {
191       info_out->valid=0;
192       return;
193    }
194
195    OPUS_COPY(info_out, &tonal->info[tonal->read_pos], 1);
196    tonal->read_subframe += len/480;
197    while (tonal->read_subframe>=4)
198    {
199       tonal->read_subframe -= 4;
200       tonal->read_pos++;
201    }
202    if (tonal->read_pos>=DETECT_SIZE)
203       tonal->read_pos-=DETECT_SIZE;
204    if (tonal->read_pos == tonal->write_pos)
205    {
206       tonal->read_pos = tonal->write_pos-1;
207       if (tonal->read_pos<0)
208          tonal->read_pos=DETECT_SIZE-1;
209       tonal->read_subframe = 3;
210    }
211 #endif
212 }
213
214 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
215 {
216     int i, b;
217     const kiss_fft_state *kfft;
218     kiss_fft_cpx in[480], out[480];
219     int N = 480, N2=240;
220     float * OPUS_RESTRICT A = tonal->angle;
221     float * OPUS_RESTRICT dA = tonal->d_angle;
222     float * OPUS_RESTRICT d2A = tonal->d2_angle;
223     float tonality[240];
224     float noisiness[240];
225     float band_tonality[NB_TBANDS];
226     float logE[NB_TBANDS];
227     float BFCC[8];
228     float features[100];
229     float frame_tonality;
230     float max_frame_tonality;
231     /*float tw_sum=0;*/
232     float frame_noisiness;
233     const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
234     float slope=0;
235     float frame_stationarity;
236     float relativeE;
237     float frame_prob;
238     float alpha, alphaE, alphaE2;
239     float frame_loudness;
240     float bandwidth_mask;
241     int bandwidth=0;
242     float maxE = 0;
243     float noise_floor;
244     int remaining;
245     AnalysisInfo *info;
246
247     tonal->last_transition++;
248     alpha = 1.f/IMIN(20, 1+tonal->count);
249     alphaE = 1.f/IMIN(50, 1+tonal->count);
250     alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
251
252     if (tonal->count<4)
253        tonal->music_prob = .5;
254     kfft = celt_mode->mdct.kfft[0];
255     if (tonal->count==0)
256        tonal->mem_fill = 240;
257     downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
258     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
259     {
260        tonal->mem_fill += len;
261        /* Don't have enough to update the analysis */
262        return;
263     }
264     info = &tonal->info[tonal->write_pos++];
265     if (tonal->write_pos>=DETECT_SIZE)
266        tonal->write_pos-=DETECT_SIZE;
267
268     for (i=0;i<N2;i++)
269     {
270        float w = analysis_window[i];
271        in[i].r = MULT16_16(w, tonal->inmem[i]);
272        in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
273        in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
274        in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
275     }
276     OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
277     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
278     downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
279     tonal->mem_fill = 240 + remaining;
280     opus_fft(kfft, in, out);
281
282     for (i=1;i<N2;i++)
283     {
284        float X1r, X2r, X1i, X2i;
285        float angle, d_angle, d2_angle;
286        float angle2, d_angle2, d2_angle2;
287        float mod1, mod2, avg_mod;
288        X1r = out[i].r+out[N-i].r;
289        X1i = out[i].i-out[N-i].i;
290        X2r = out[i].i+out[N-i].i;
291        X2i = out[N-i].r-out[i].r;
292
293        angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
294        d_angle = angle - A[i];
295        d2_angle = d_angle - dA[i];
296
297        angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
298        d_angle2 = angle2 - angle;
299        d2_angle2 = d_angle2 - d_angle;
300
301        mod1 = d2_angle - (float)floor(.5+d2_angle);
302        noisiness[i] = ABS16(mod1);
303        mod1 *= mod1;
304        mod1 *= mod1;
305
306        mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
307        noisiness[i] += ABS16(mod2);
308        mod2 *= mod2;
309        mod2 *= mod2;
310
311        avg_mod = .25f*(d2A[i]+2.f*mod1+mod2);
312        tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
313
314        A[i] = angle2;
315        dA[i] = d_angle2;
316        d2A[i] = mod2;
317     }
318
319     frame_tonality = 0;
320     max_frame_tonality = 0;
321     /*tw_sum = 0;*/
322     info->activity = 0;
323     frame_noisiness = 0;
324     frame_stationarity = 0;
325     if (!tonal->count)
326     {
327        for (b=0;b<NB_TBANDS;b++)
328        {
329           tonal->lowE[b] = 1e10;
330           tonal->highE[b] = -1e10;
331        }
332     }
333     relativeE = 0;
334     frame_loudness = 0;
335     bandwidth_mask = 0;
336     for (b=0;b<NB_TBANDS;b++)
337     {
338        float E=0, tE=0, nE=0;
339        float L1, L2;
340        float stationarity;
341        for (i=tbands[b];i<tbands[b+1];i++)
342        {
343           float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
344                      + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
345           E += binE;
346           tE += binE*tonality[i];
347           nE += binE*2.f*(.5f-noisiness[i]);
348        }
349        tonal->E[tonal->E_count][b] = E;
350        frame_noisiness += nE/(1e-15f+E);
351
352        frame_loudness += celt_sqrt(E+1e-10f);
353        logE[b] = (float)log(E+1e-10f);
354        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
355        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
356        if (tonal->highE[b] < tonal->lowE[b]+1.f)
357        {
358           tonal->highE[b]+=.5f;
359           tonal->lowE[b]-=.5f;
360        }
361        relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
362
363        L1=L2=0;
364        for (i=0;i<NB_FRAMES;i++)
365        {
366           L1 += celt_sqrt(tonal->E[i][b]);
367           L2 += tonal->E[i][b];
368        }
369
370        stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
371        stationarity *= stationarity;
372        stationarity *= stationarity;
373        frame_stationarity += stationarity;
374        /*band_tonality[b] = tE/(1e-15+E)*/;
375        band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
376 #if 0
377        if (b>=NB_TONAL_SKIP_BANDS)
378        {
379           frame_tonality += tweight[b]*band_tonality[b];
380           tw_sum += tweight[b];
381        }
382 #else
383        frame_tonality += band_tonality[b];
384        if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
385           frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
386 #endif
387        max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality);
388        slope += band_tonality[b]*(b-8);
389        /*printf("%f %f ", band_tonality[b], stationarity);*/
390        tonal->prev_band_tonality[b] = band_tonality[b];
391     }
392
393     bandwidth_mask = 0;
394     bandwidth = 0;
395     for (b=0;b<NB_TOT_BANDS;b++)
396        maxE = MAX32(maxE, tonal->meanE[b]);
397     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
398     noise_floor *= noise_floor;
399     for (b=0;b<NB_TOT_BANDS;b++)
400     {
401        float E=0;
402        int band_start, band_end;
403        /* Keep a margin of 300 Hz for aliasing */
404        band_start = extra_bands[b]+3;
405        band_end = extra_bands[b+1]+3;
406        for (i=band_start;i<band_end;i++)
407        {
408           float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
409                      + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
410           E += binE;
411        }
412        E /= (band_end-band_start);
413        maxE = MAX32(maxE, E);
414        if (tonal->count>2)
415        {
416           tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
417        } else {
418           tonal->meanE[b] = E;
419        }
420        E = MAX32(E, tonal->meanE[b]);
421        /* 13 dB slope for spreading function */
422        bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
423        /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
424        if (E>.1*bandwidth_mask && E*1e10f > maxE && E > noise_floor)
425           bandwidth = b;
426     }
427     if (tonal->count<=2)
428        bandwidth = 20;
429     frame_loudness = 20*(float)log10(frame_loudness);
430     tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
431     tonal->lowECount *= (1-alphaE);
432     if (frame_loudness < tonal->Etracker-30)
433        tonal->lowECount += alphaE;
434
435     for (i=0;i<8;i++)
436     {
437        float sum=0;
438        for (b=0;b<16;b++)
439           sum += dct_table[i*16+b]*logE[b];
440        BFCC[i] = sum;
441     }
442
443     frame_stationarity /= NB_TBANDS;
444     relativeE /= NB_TBANDS;
445     if (tonal->count<10)
446        relativeE = .5;
447     frame_noisiness /= NB_TBANDS;
448 #if 1
449     info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
450 #else
451     info->activity = .5*(1+frame_noisiness-frame_stationarity);
452 #endif
453     frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
454     frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
455     tonal->prev_tonality = frame_tonality;
456
457     slope /= 8*8;
458     info->tonality_slope = slope;
459
460     tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
461     tonal->count++;
462     info->tonality = frame_tonality;
463
464     for (i=0;i<4;i++)
465        features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i];
466
467     for (i=0;i<4;i++)
468        tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
469
470     for (i=0;i<4;i++)
471         features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]);
472     for (i=0;i<3;i++)
473         features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
474
475     if (tonal->count > 5)
476     {
477        for (i=0;i<9;i++)
478           tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
479     }
480
481     for (i=0;i<8;i++)
482     {
483        tonal->mem[i+24] = tonal->mem[i+16];
484        tonal->mem[i+16] = tonal->mem[i+8];
485        tonal->mem[i+8] = tonal->mem[i];
486        tonal->mem[i] = BFCC[i];
487     }
488     for (i=0;i<9;i++)
489        features[11+i] = celt_sqrt(tonal->std[i]);
490     features[20] = info->tonality;
491     features[21] = info->activity;
492     features[22] = frame_stationarity;
493     features[23] = info->tonality_slope;
494     features[24] = tonal->lowECount;
495
496 #ifndef FIXED_POINT
497     mlp_process(&net, features, &frame_prob);
498     frame_prob = .5f*(frame_prob+1);
499     /* Curve fitting between the MLP probability and the actual probability */
500     frame_prob = .01f + 1.21f*frame_prob*frame_prob - .23f*(float)pow(frame_prob, 10);
501
502     /*printf("%f\n", frame_prob);*/
503     {
504        float tau, beta;
505        float p0, p1;
506        float max_certainty;
507        /* One transition every 3 minutes */
508        tau = .00005f;
509        beta = .1f;
510        max_certainty = .01f+1.f/(20.f+.5f*tonal->last_transition);
511        max_certainty = 0;
512        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
513        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
514        p0 *= (float)pow(1-frame_prob, beta);
515        p1 *= (float)pow(frame_prob, beta);
516        tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
517        info->music_prob = tonal->music_prob;
518        info->music_prob = frame_prob;
519
520        float psum=1e-20;
521        float speech0 = (float)pow(1-frame_prob, beta);
522        float music0  = (float)pow(frame_prob, beta);
523        if (tonal->count==1)
524        {
525           tonal->pspeech[0]=.5;
526           tonal->pmusic [0]=.5;
527        }
528        float s0, m0;
529        s0 = tonal->pspeech[0] + tonal->pspeech[1];
530        m0 = tonal->pmusic [0] + tonal->pmusic [1];
531        tonal->pspeech[0] = s0*(1-tau)*speech0;
532        tonal->pmusic [0] = m0*(1-tau)*music0;
533        for (i=1;i<DETECT_SIZE-1;i++)
534        {
535           tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
536           tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
537        }
538        tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
539        tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
540
541        for (i=0;i<DETECT_SIZE;i++)
542           psum += tonal->pspeech[i] + tonal->pmusic[i];
543        psum = 1.f/psum;
544        for (i=0;i<DETECT_SIZE;i++)
545        {
546           tonal->pspeech[i] *= psum;
547           tonal->pmusic [i] *= psum;
548        }
549        psum = tonal->pmusic[0];
550        for (i=1;i<DETECT_SIZE;i++)
551           psum += tonal->pspeech[i];
552
553        /*printf("%f %f %f\n", frame_prob, info->music_prob, psum);*/
554     }
555     if (tonal->last_music != (tonal->music_prob>.5f))
556        tonal->last_transition=0;
557     tonal->last_music = tonal->music_prob>.5f;
558 #else
559     info->music_prob = 0;
560 #endif
561     /*for (i=0;i<25;i++)
562        printf("%f ", features[i]);
563     printf("\n");*/
564
565     if (bandwidth<=12 || (bandwidth==13 && tonal->opus_bandwidth == OPUS_BANDWIDTH_NARROWBAND))
566        tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
567     else if (bandwidth<=14 || (bandwidth==15 && tonal->opus_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND))
568        tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
569     else if (bandwidth<=16 || (bandwidth==17 && tonal->opus_bandwidth == OPUS_BANDWIDTH_WIDEBAND))
570        tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
571     else if (bandwidth<=18)
572        tonal->opus_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
573     else
574        tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
575
576     info->bandwidth = bandwidth;
577     info->opus_bandwidth = tonal->opus_bandwidth;
578     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
579     info->noisiness = frame_noisiness;
580     info->valid = 1;
581     if (info_out!=NULL)
582        OPUS_COPY(info_out, info, 1);
583 }
584
585 int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
586                         const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
587                         int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
588 {
589    int offset;
590    int pcm_len;
591
592    /* Avoid overflow/wrap-around of the analysis buffer */
593    frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
594
595    pcm_len = frame_size - analysis->analysis_offset;
596    offset = 0;
597    do {
598       tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
599       offset += 480;
600       pcm_len -= 480;
601    } while (pcm_len>0);
602    analysis->analysis_offset = frame_size;
603
604    if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
605    {
606       int LM = 3;
607       LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps,
608             analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
609       while ((Fs/400<<LM)>frame_size)
610          LM--;
611       frame_size = (Fs/400<<LM);
612    } else {
613       frame_size = frame_size_select(frame_size, variable_duration, Fs);
614    }
615    if (frame_size<0)
616       return -1;
617    analysis->analysis_offset -= frame_size;
618
619    /* Only perform analysis up to 20-ms frames. Longer ones will be split if
620       they're in CELT-only mode. */
621    analysis_info->valid = 0;
622    tonality_get_info(analysis, analysis_info, frame_size);
623
624    return frame_size;
625 }