Adds silence probability to speech/music detector
[opus.git] / src / analysis.c
1 /* Copyright (c) 2011 Xiph.Org Foundation
2    Written by Jean-Marc Valin */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "kiss_fft.h"
33 #include "celt.h"
34 #include "modes.h"
35 #include "arch.h"
36 #include "quant_bands.h"
37 #include <stdio.h>
38 #include "analysis.h"
39 #include "mlp.h"
40
41 extern const MLP net;
42
43 #ifndef M_PI
44 #define M_PI 3.141592653
45 #endif
46
47 static const float dct_table[128] = {
48         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
49         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
50         0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
51        -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
52         0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
53        -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
54         0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
55         0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
56         0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
57         0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
58         0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
59        -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
60         0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
61        -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
62         0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
63         0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
64 };
65
66 static const float analysis_window[240] = {
67       0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
68       0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
69       0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
70       0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
71       0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
72       0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
73       0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
74       0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
75       0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
76       0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
77       0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
78       0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
79       0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
80       0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
81       0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
82       0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
83       0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
84       0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
85       0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
86       0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
87       0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
88       0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
89       0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
90       0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
91       0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
92       0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
93       0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
94       0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
95       0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
96       0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
97 };
98
99 static const int tbands[NB_TBANDS+1] = {
100        2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
101 };
102
103 static const int extra_bands[NB_TOT_BANDS+1] = {
104       0, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
105 };
106
107 /*static const float tweight[NB_TBANDS+1] = {
108       .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
109 };*/
110
111 #define NB_TONAL_SKIP_BANDS 9
112
113 #define cA 0.43157974f
114 #define cB 0.67848403f
115 #define cC 0.08595542f
116 #define cE ((float)M_PI/2)
117 static inline float fast_atan2f(float y, float x) {
118    float x2, y2;
119    /* Should avoid underflow on the values we'll get */
120    if (ABS16(x)+ABS16(y)<1e-9f)
121    {
122       x*=1e12f;
123       y*=1e12f;
124    }
125    x2 = x*x;
126    y2 = y*y;
127    if(x2<y2){
128       float den = (y2 + cB*x2) * (y2 + cC*x2);
129       if (den!=0)
130          return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
131       else
132          return (y<0 ? -cE : cE);
133    }else{
134       float den = (x2 + cB*y2) * (x2 + cC*y2);
135       if (den!=0)
136          return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
137       else
138          return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
139    }
140 }
141
142 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
143 {
144    int pos;
145    int curr_lookahead;
146    float psum;
147    int i;
148
149    pos = tonal->read_pos;
150    curr_lookahead = tonal->write_pos-tonal->read_pos;
151    if (curr_lookahead<0)
152       curr_lookahead += DETECT_SIZE;
153
154    if (len > 480 && pos != tonal->write_pos)
155    {
156       pos++;
157       if (pos==DETECT_SIZE)
158          pos=0;
159    }
160    if (pos == tonal->write_pos)
161       pos--;
162    if (pos<0)
163       pos = DETECT_SIZE-1;
164    OPUS_COPY(info_out, &tonal->info[pos], 1);
165    tonal->read_subframe += len/120;
166    while (tonal->read_subframe>=4)
167    {
168       tonal->read_subframe -= 4;
169       tonal->read_pos++;
170    }
171    if (tonal->read_pos>=DETECT_SIZE)
172       tonal->read_pos-=DETECT_SIZE;
173
174    /* Compensate for the delay in the features themselves.
175       FIXME: Need a better estimate the 10 I just made up */
176    curr_lookahead = IMAX(curr_lookahead-10, 0);
177
178    psum=0;
179    for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
180       psum += tonal->pmusic[i];
181    for (;i<DETECT_SIZE;i++)
182       psum += tonal->pspeech[i];
183    /*printf("%f %f\n", psum, info_out->music_prob);*/
184
185    info_out->music_prob = psum;
186 }
187
188 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
189 {
190     int i, b;
191     const kiss_fft_state *kfft;
192     kiss_fft_cpx in[480], out[480];
193     int N = 480, N2=240;
194     float * OPUS_RESTRICT A = tonal->angle;
195     float * OPUS_RESTRICT dA = tonal->d_angle;
196     float * OPUS_RESTRICT d2A = tonal->d2_angle;
197     float tonality[240];
198     float noisiness[240];
199     float band_tonality[NB_TBANDS];
200     float logE[NB_TBANDS];
201     float BFCC[8];
202     float features[100];
203     float frame_tonality;
204     float max_frame_tonality;
205     /*float tw_sum=0;*/
206     float frame_noisiness;
207     const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
208     float slope=0;
209     float frame_stationarity;
210     float relativeE;
211     float frame_probs[2];
212     float alpha, alphaE, alphaE2;
213     float frame_loudness;
214     float bandwidth_mask;
215     int bandwidth=0;
216     float maxE = 0;
217     float noise_floor;
218     int remaining;
219     AnalysisInfo *info;
220
221     tonal->last_transition++;
222     alpha = 1.f/IMIN(20, 1+tonal->count);
223     alphaE = 1.f/IMIN(50, 1+tonal->count);
224     alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
225
226     if (tonal->count<4)
227        tonal->music_prob = .5;
228     kfft = celt_mode->mdct.kfft[0];
229     if (tonal->count==0)
230        tonal->mem_fill = 240;
231     downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
232     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
233     {
234        tonal->mem_fill += len;
235        /* Don't have enough to update the analysis */
236        return;
237     }
238     info = &tonal->info[tonal->write_pos++];
239     if (tonal->write_pos>=DETECT_SIZE)
240        tonal->write_pos-=DETECT_SIZE;
241
242     for (i=0;i<N2;i++)
243     {
244        float w = analysis_window[i];
245        in[i].r = MULT16_16(w, tonal->inmem[i]);
246        in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
247        in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
248        in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
249     }
250     OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
251     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
252     downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
253     tonal->mem_fill = 240 + remaining;
254     opus_fft(kfft, in, out);
255
256     for (i=1;i<N2;i++)
257     {
258        float X1r, X2r, X1i, X2i;
259        float angle, d_angle, d2_angle;
260        float angle2, d_angle2, d2_angle2;
261        float mod1, mod2, avg_mod;
262        X1r = out[i].r+out[N-i].r;
263        X1i = out[i].i-out[N-i].i;
264        X2r = out[i].i+out[N-i].i;
265        X2i = out[N-i].r-out[i].r;
266
267        angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
268        d_angle = angle - A[i];
269        d2_angle = d_angle - dA[i];
270
271        angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
272        d_angle2 = angle2 - angle;
273        d2_angle2 = d_angle2 - d_angle;
274
275        mod1 = d2_angle - (float)floor(.5+d2_angle);
276        noisiness[i] = ABS16(mod1);
277        mod1 *= mod1;
278        mod1 *= mod1;
279
280        mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
281        noisiness[i] += ABS16(mod2);
282        mod2 *= mod2;
283        mod2 *= mod2;
284
285        avg_mod = .25f*(d2A[i]+2.f*mod1+mod2);
286        tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
287
288        A[i] = angle2;
289        dA[i] = d_angle2;
290        d2A[i] = mod2;
291     }
292
293     frame_tonality = 0;
294     max_frame_tonality = 0;
295     /*tw_sum = 0;*/
296     info->activity = 0;
297     frame_noisiness = 0;
298     frame_stationarity = 0;
299     if (!tonal->count)
300     {
301        for (b=0;b<NB_TBANDS;b++)
302        {
303           tonal->lowE[b] = 1e10;
304           tonal->highE[b] = -1e10;
305        }
306     }
307     relativeE = 0;
308     frame_loudness = 0;
309     bandwidth_mask = 0;
310     for (b=0;b<NB_TBANDS;b++)
311     {
312        float E=0, tE=0, nE=0;
313        float L1, L2;
314        float stationarity;
315        for (i=tbands[b];i<tbands[b+1];i++)
316        {
317           float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
318                      + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
319           E += binE;
320           tE += binE*tonality[i];
321           nE += binE*2.f*(.5f-noisiness[i]);
322        }
323        tonal->E[tonal->E_count][b] = E;
324        frame_noisiness += nE/(1e-15f+E);
325
326        frame_loudness += celt_sqrt(E+1e-10f);
327        logE[b] = (float)log(E+1e-10f);
328        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
329        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
330        if (tonal->highE[b] < tonal->lowE[b]+1.f)
331        {
332           tonal->highE[b]+=.5f;
333           tonal->lowE[b]-=.5f;
334        }
335        relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
336
337        L1=L2=0;
338        for (i=0;i<NB_FRAMES;i++)
339        {
340           L1 += celt_sqrt(tonal->E[i][b]);
341           L2 += tonal->E[i][b];
342        }
343
344        stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
345        stationarity *= stationarity;
346        stationarity *= stationarity;
347        frame_stationarity += stationarity;
348        /*band_tonality[b] = tE/(1e-15+E)*/;
349        band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
350 #if 0
351        if (b>=NB_TONAL_SKIP_BANDS)
352        {
353           frame_tonality += tweight[b]*band_tonality[b];
354           tw_sum += tweight[b];
355        }
356 #else
357        frame_tonality += band_tonality[b];
358        if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
359           frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
360 #endif
361        max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality);
362        slope += band_tonality[b]*(b-8);
363        /*printf("%f %f ", band_tonality[b], stationarity);*/
364        tonal->prev_band_tonality[b] = band_tonality[b];
365     }
366
367     bandwidth_mask = 0;
368     bandwidth = 0;
369     for (b=0;b<NB_TOT_BANDS;b++)
370        maxE = MAX32(maxE, tonal->meanE[b]);
371     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
372     noise_floor *= noise_floor;
373     for (b=0;b<NB_TOT_BANDS;b++)
374     {
375        float E=0;
376        int band_start, band_end;
377        /* Keep a margin of 300 Hz for aliasing */
378        band_start = extra_bands[b]+3;
379        band_end = extra_bands[b+1]+3;
380        for (i=band_start;i<band_end;i++)
381        {
382           float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
383                      + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
384           E += binE;
385        }
386        E /= (band_end-band_start);
387        maxE = MAX32(maxE, E);
388        if (tonal->count>2)
389        {
390           tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
391        } else {
392           tonal->meanE[b] = E;
393        }
394        E = MAX32(E, tonal->meanE[b]);
395        /* 13 dB slope for spreading function */
396        bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
397        /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
398        if (E>.1*bandwidth_mask && E*1e10f > maxE && E > noise_floor)
399           bandwidth = b;
400     }
401     if (tonal->count<=2)
402        bandwidth = 20;
403     frame_loudness = 20*(float)log10(frame_loudness);
404     tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
405     tonal->lowECount *= (1-alphaE);
406     if (frame_loudness < tonal->Etracker-30)
407        tonal->lowECount += alphaE;
408
409     for (i=0;i<8;i++)
410     {
411        float sum=0;
412        for (b=0;b<16;b++)
413           sum += dct_table[i*16+b]*logE[b];
414        BFCC[i] = sum;
415     }
416
417     frame_stationarity /= NB_TBANDS;
418     relativeE /= NB_TBANDS;
419     if (tonal->count<10)
420        relativeE = .5;
421     frame_noisiness /= NB_TBANDS;
422 #if 1
423     info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
424 #else
425     info->activity = .5*(1+frame_noisiness-frame_stationarity);
426 #endif
427     frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
428     frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
429     tonal->prev_tonality = frame_tonality;
430
431     slope /= 8*8;
432     info->tonality_slope = slope;
433
434     tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
435     tonal->count++;
436     info->tonality = frame_tonality;
437
438     for (i=0;i<4;i++)
439        features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i];
440
441     for (i=0;i<4;i++)
442        tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
443
444     for (i=0;i<4;i++)
445         features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]);
446     for (i=0;i<3;i++)
447         features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
448
449     if (tonal->count > 5)
450     {
451        for (i=0;i<9;i++)
452           tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
453     }
454
455     for (i=0;i<8;i++)
456     {
457        tonal->mem[i+24] = tonal->mem[i+16];
458        tonal->mem[i+16] = tonal->mem[i+8];
459        tonal->mem[i+8] = tonal->mem[i];
460        tonal->mem[i] = BFCC[i];
461     }
462     for (i=0;i<9;i++)
463        features[11+i] = celt_sqrt(tonal->std[i]);
464     features[20] = info->tonality;
465     features[21] = info->activity;
466     features[22] = frame_stationarity;
467     features[23] = info->tonality_slope;
468     features[24] = tonal->lowECount;
469
470 #ifndef FIXED_POINT
471     mlp_process(&net, features, frame_probs);
472     frame_probs[0] = .5f*(frame_probs[0]+1);
473     /* Curve fitting between the MLP probability and the actual probability */
474     frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
475     frame_probs[1] = .5*frame_probs[1]+.5;
476     frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5;
477
478     /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
479     {
480        float tau, beta;
481        float p0, p1;
482        float max_certainty;
483        /* One transition every 3 minutes */
484        tau = .00005f*frame_probs[1];
485        beta = .05f;
486        max_certainty = .01f+1.f/(20.f+.5f*tonal->last_transition);
487        max_certainty = 0;
488        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
489        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
490        p0 *= (float)pow(1-frame_probs[0], beta);
491        p1 *= (float)pow(frame_probs[0], beta);
492        tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
493        info->music_prob = tonal->music_prob;
494        info->music_prob = frame_probs[0];
495
496        float psum=1e-20;
497        float speech0 = (float)pow(1-frame_probs[0], beta);
498        float music0  = (float)pow(frame_probs[0], beta);
499        if (tonal->count==1)
500        {
501           tonal->pspeech[0]=.5;
502           tonal->pmusic [0]=.5;
503        }
504        float s0, m0;
505        s0 = tonal->pspeech[0] + tonal->pspeech[1];
506        m0 = tonal->pmusic [0] + tonal->pmusic [1];
507        tonal->pspeech[0] = s0*(1-tau)*speech0;
508        tonal->pmusic [0] = m0*(1-tau)*music0;
509        for (i=1;i<DETECT_SIZE-1;i++)
510        {
511           tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
512           tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
513        }
514        tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
515        tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
516
517        for (i=0;i<DETECT_SIZE;i++)
518           psum += tonal->pspeech[i] + tonal->pmusic[i];
519        psum = 1.f/psum;
520        for (i=0;i<DETECT_SIZE;i++)
521        {
522           tonal->pspeech[i] *= psum;
523           tonal->pmusic [i] *= psum;
524        }
525        psum = tonal->pmusic[0];
526        for (i=1;i<DETECT_SIZE;i++)
527           psum += tonal->pspeech[i];
528
529        /*printf("%f\n", psum);*/
530     }
531     if (tonal->last_music != (tonal->music_prob>.5f))
532        tonal->last_transition=0;
533     tonal->last_music = tonal->music_prob>.5f;
534 #else
535     info->music_prob = 0;
536 #endif
537     /*for (i=0;i<25;i++)
538        printf("%f ", features[i]);
539     printf("\n");*/
540
541     if (bandwidth<=12 || (bandwidth==13 && tonal->opus_bandwidth == OPUS_BANDWIDTH_NARROWBAND))
542        tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
543     else if (bandwidth<=14 || (bandwidth==15 && tonal->opus_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND))
544        tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
545     else if (bandwidth<=16 || (bandwidth==17 && tonal->opus_bandwidth == OPUS_BANDWIDTH_WIDEBAND))
546        tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
547     else if (bandwidth<=18)
548        tonal->opus_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
549     else
550        tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
551
552     info->bandwidth = bandwidth;
553     info->opus_bandwidth = tonal->opus_bandwidth;
554     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
555     info->noisiness = frame_noisiness;
556     info->valid = 1;
557     if (info_out!=NULL)
558        OPUS_COPY(info_out, info, 1);
559 }
560
561 int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
562                         const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
563                         int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
564 {
565    int offset;
566    int pcm_len;
567
568    /* Avoid overflow/wrap-around of the analysis buffer */
569    frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
570
571    pcm_len = frame_size - analysis->analysis_offset;
572    offset = 0;
573    do {
574       tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
575       offset += 480;
576       pcm_len -= 480;
577    } while (pcm_len>0);
578    analysis->analysis_offset = frame_size;
579
580    if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
581    {
582       int LM = 3;
583       LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps,
584             analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
585       while ((Fs/400<<LM)>frame_size)
586          LM--;
587       frame_size = (Fs/400<<LM);
588    } else {
589       frame_size = frame_size_select(frame_size, variable_duration, Fs);
590    }
591    if (frame_size<0)
592       return -1;
593    analysis->analysis_offset -= frame_size;
594
595    /* Only perform analysis up to 20-ms frames. Longer ones will be split if
596       they're in CELT-only mode. */
597    analysis_info->valid = 0;
598    tonality_get_info(analysis, analysis_info, frame_size);
599
600    return frame_size;
601 }