Fixes MSVC warnings
[opus.git] / src / analysis.c
1 /* Copyright (c) 2011 Xiph.Org Foundation
2    Written by Jean-Marc Valin */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
19    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "kiss_fft.h"
33 #include "celt.h"
34 #include "modes.h"
35 #include "arch.h"
36 #include "quant_bands.h"
37 #include <stdio.h>
38 #include "analysis.h"
39 #include "mlp.h"
40 #include "stack_alloc.h"
41
42 extern const MLP net;
43
44 #ifndef M_PI
45 #define M_PI 3.141592653
46 #endif
47
48 static const float dct_table[128] = {
49         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
50         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
51         0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f,
52        -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f,
53         0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f,
54        -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f,
55         0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f,
56         0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f,
57         0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
58         0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f,
59         0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f,
60        -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f,
61         0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f,
62        -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f,
63         0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f,
64         0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f,
65 };
66
67 static const float analysis_window[240] = {
68       0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
69       0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
70       0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
71       0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
72       0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
73       0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
74       0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
75       0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
76       0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
77       0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
78       0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
79       0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
80       0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
81       0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
82       0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
83       0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
84       0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
85       0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
86       0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
87       0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
88       0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
89       0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
90       0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
91       0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
92       0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
93       0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
94       0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
95       0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
96       0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
97       0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
98 };
99
100 static const int tbands[NB_TBANDS+1] = {
101        2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
102 };
103
104 static const int extra_bands[NB_TOT_BANDS+1] = {
105       1, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
106 };
107
108 /*static const float tweight[NB_TBANDS+1] = {
109       .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
110 };*/
111
112 #define NB_TONAL_SKIP_BANDS 9
113
114 #define cA 0.43157974f
115 #define cB 0.67848403f
116 #define cC 0.08595542f
117 #define cE ((float)M_PI/2)
118 static inline float fast_atan2f(float y, float x) {
119    float x2, y2;
120    /* Should avoid underflow on the values we'll get */
121    if (ABS16(x)+ABS16(y)<1e-9f)
122    {
123       x*=1e12f;
124       y*=1e12f;
125    }
126    x2 = x*x;
127    y2 = y*y;
128    if(x2<y2){
129       float den = (y2 + cB*x2) * (y2 + cC*x2);
130       if (den!=0)
131          return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
132       else
133          return (y<0 ? -cE : cE);
134    }else{
135       float den = (x2 + cB*y2) * (x2 + cC*y2);
136       if (den!=0)
137          return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
138       else
139          return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
140    }
141 }
142
143 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
144 {
145    int pos;
146    int curr_lookahead;
147    float psum;
148    int i;
149
150    pos = tonal->read_pos;
151    curr_lookahead = tonal->write_pos-tonal->read_pos;
152    if (curr_lookahead<0)
153       curr_lookahead += DETECT_SIZE;
154
155    if (len > 480 && pos != tonal->write_pos)
156    {
157       pos++;
158       if (pos==DETECT_SIZE)
159          pos=0;
160    }
161    if (pos == tonal->write_pos)
162       pos--;
163    if (pos<0)
164       pos = DETECT_SIZE-1;
165    OPUS_COPY(info_out, &tonal->info[pos], 1);
166    tonal->read_subframe += len/120;
167    while (tonal->read_subframe>=4)
168    {
169       tonal->read_subframe -= 4;
170       tonal->read_pos++;
171    }
172    if (tonal->read_pos>=DETECT_SIZE)
173       tonal->read_pos-=DETECT_SIZE;
174
175    /* Compensate for the delay in the features themselves.
176       FIXME: Need a better estimate the 10 I just made up */
177    curr_lookahead = IMAX(curr_lookahead-10, 0);
178
179    psum=0;
180    for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
181       psum += tonal->pmusic[i];
182    for (;i<DETECT_SIZE;i++)
183       psum += tonal->pspeech[i];
184    psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
185    /*printf("%f %f\n", psum, info_out->music_prob);*/
186
187    info_out->music_prob = psum;
188 }
189
190 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
191 {
192     int i, b;
193     const kiss_fft_state *kfft;
194     VARDECL(kiss_fft_cpx, in);
195     VARDECL(kiss_fft_cpx, out);
196     int N = 480, N2=240;
197     float * OPUS_RESTRICT A = tonal->angle;
198     float * OPUS_RESTRICT dA = tonal->d_angle;
199     float * OPUS_RESTRICT d2A = tonal->d2_angle;
200     VARDECL(float, tonality);
201     VARDECL(float, noisiness);
202     float band_tonality[NB_TBANDS];
203     float logE[NB_TBANDS];
204     float BFCC[8];
205     float features[25];
206     float frame_tonality;
207     float max_frame_tonality;
208     /*float tw_sum=0;*/
209     float frame_noisiness;
210     const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI);
211     float slope=0;
212     float frame_stationarity;
213     float relativeE;
214     float frame_probs[2];
215     float alpha, alphaE, alphaE2;
216     float frame_loudness;
217     float bandwidth_mask;
218     int bandwidth=0;
219     float maxE = 0;
220     float noise_floor;
221     int remaining;
222     AnalysisInfo *info;
223     SAVE_STACK;
224
225     tonal->last_transition++;
226     alpha = 1.f/IMIN(20, 1+tonal->count);
227     alphaE = 1.f/IMIN(50, 1+tonal->count);
228     alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
229
230     if (tonal->count<4)
231        tonal->music_prob = .5;
232     kfft = celt_mode->mdct.kfft[0];
233     if (tonal->count==0)
234        tonal->mem_fill = 240;
235     downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
236     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
237     {
238        tonal->mem_fill += len;
239        /* Don't have enough to update the analysis */
240        RESTORE_STACK;
241        return;
242     }
243     info = &tonal->info[tonal->write_pos++];
244     if (tonal->write_pos>=DETECT_SIZE)
245        tonal->write_pos-=DETECT_SIZE;
246
247     ALLOC(in, 480, kiss_fft_cpx);
248     ALLOC(out, 480, kiss_fft_cpx);
249     ALLOC(tonality, 240, float);
250     ALLOC(noisiness, 240, float);
251     for (i=0;i<N2;i++)
252     {
253        float w = analysis_window[i];
254        in[i].r = MULT16_16(w, tonal->inmem[i]);
255        in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
256        in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
257        in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
258     }
259     OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
260     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
261     downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
262     tonal->mem_fill = 240 + remaining;
263     opus_fft(kfft, in, out);
264
265     for (i=1;i<N2;i++)
266     {
267        float X1r, X2r, X1i, X2i;
268        float angle, d_angle, d2_angle;
269        float angle2, d_angle2, d2_angle2;
270        float mod1, mod2, avg_mod;
271        X1r = out[i].r+out[N-i].r;
272        X1i = out[i].i-out[N-i].i;
273        X2r = out[i].i+out[N-i].i;
274        X2i = out[N-i].r-out[i].r;
275
276        angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
277        d_angle = angle - A[i];
278        d2_angle = d_angle - dA[i];
279
280        angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r);
281        d_angle2 = angle2 - angle;
282        d2_angle2 = d_angle2 - d_angle;
283
284        mod1 = d2_angle - (float)floor(.5+d2_angle);
285        noisiness[i] = ABS16(mod1);
286        mod1 *= mod1;
287        mod1 *= mod1;
288
289        mod2 = d2_angle2 - (float)floor(.5+d2_angle2);
290        noisiness[i] += ABS16(mod2);
291        mod2 *= mod2;
292        mod2 *= mod2;
293
294        avg_mod = .25f*(d2A[i]+2.f*mod1+mod2);
295        tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
296
297        A[i] = angle2;
298        dA[i] = d_angle2;
299        d2A[i] = mod2;
300     }
301
302     frame_tonality = 0;
303     max_frame_tonality = 0;
304     /*tw_sum = 0;*/
305     info->activity = 0;
306     frame_noisiness = 0;
307     frame_stationarity = 0;
308     if (!tonal->count)
309     {
310        for (b=0;b<NB_TBANDS;b++)
311        {
312           tonal->lowE[b] = 1e10;
313           tonal->highE[b] = -1e10;
314        }
315     }
316     relativeE = 0;
317     frame_loudness = 0;
318     bandwidth_mask = 0;
319     for (b=0;b<NB_TBANDS;b++)
320     {
321        float E=0, tE=0, nE=0;
322        float L1, L2;
323        float stationarity;
324        for (i=tbands[b];i<tbands[b+1];i++)
325        {
326           float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
327                      + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
328           E += binE;
329           tE += binE*tonality[i];
330           nE += binE*2.f*(.5f-noisiness[i]);
331        }
332        tonal->E[tonal->E_count][b] = E;
333        frame_noisiness += nE/(1e-15f+E);
334
335        frame_loudness += celt_sqrt(E+1e-10f);
336        logE[b] = (float)log(E+1e-10f);
337        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
338        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
339        if (tonal->highE[b] < tonal->lowE[b]+1.f)
340        {
341           tonal->highE[b]+=.5f;
342           tonal->lowE[b]-=.5f;
343        }
344        relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
345
346        L1=L2=0;
347        for (i=0;i<NB_FRAMES;i++)
348        {
349           L1 += celt_sqrt(tonal->E[i][b]);
350           L2 += tonal->E[i][b];
351        }
352
353        stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
354        stationarity *= stationarity;
355        stationarity *= stationarity;
356        frame_stationarity += stationarity;
357        /*band_tonality[b] = tE/(1e-15+E)*/;
358        band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
359 #if 0
360        if (b>=NB_TONAL_SKIP_BANDS)
361        {
362           frame_tonality += tweight[b]*band_tonality[b];
363           tw_sum += tweight[b];
364        }
365 #else
366        frame_tonality += band_tonality[b];
367        if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
368           frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
369 #endif
370        max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality);
371        slope += band_tonality[b]*(b-8);
372        /*printf("%f %f ", band_tonality[b], stationarity);*/
373        tonal->prev_band_tonality[b] = band_tonality[b];
374     }
375
376     bandwidth_mask = 0;
377     bandwidth = 0;
378     maxE = 0;
379     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
380     noise_floor *= noise_floor;
381     for (b=0;b<NB_TOT_BANDS;b++)
382     {
383        float E=0;
384        int band_start, band_end;
385        /* Keep a margin of 300 Hz for aliasing */
386        band_start = extra_bands[b];
387        band_end = extra_bands[b+1];
388        for (i=band_start;i<band_end;i++)
389        {
390           float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
391                      + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
392           E += binE;
393        }
394        maxE = MAX32(maxE, E);
395        tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
396        E = MAX32(E, tonal->meanE[b]);
397        /* Use a simple follower with 13 dB/Bark slope for spreading function */
398        bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
399        /* Consider the band "active" only if all these conditions are met:
400           1) less than 10 dB below the simple follower
401           2) less than 90 dB below the peak band (maximal masking possible considering
402              both the ATH and the loudness-dependent slope of the spreading function)
403           3) above the PCM quantization noise floor
404        */
405        if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
406           bandwidth = b;
407     }
408     if (tonal->count<=2)
409        bandwidth = 20;
410     frame_loudness = 20*(float)log10(frame_loudness);
411     tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
412     tonal->lowECount *= (1-alphaE);
413     if (frame_loudness < tonal->Etracker-30)
414        tonal->lowECount += alphaE;
415
416     for (i=0;i<8;i++)
417     {
418        float sum=0;
419        for (b=0;b<16;b++)
420           sum += dct_table[i*16+b]*logE[b];
421        BFCC[i] = sum;
422     }
423
424     frame_stationarity /= NB_TBANDS;
425     relativeE /= NB_TBANDS;
426     if (tonal->count<10)
427        relativeE = .5;
428     frame_noisiness /= NB_TBANDS;
429 #if 1
430     info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
431 #else
432     info->activity = .5*(1+frame_noisiness-frame_stationarity);
433 #endif
434     frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
435     frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f);
436     tonal->prev_tonality = frame_tonality;
437
438     slope /= 8*8;
439     info->tonality_slope = slope;
440
441     tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
442     tonal->count++;
443     info->tonality = frame_tonality;
444
445     for (i=0;i<4;i++)
446        features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i];
447
448     for (i=0;i<4;i++)
449        tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
450
451     for (i=0;i<4;i++)
452         features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]);
453     for (i=0;i<3;i++)
454         features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8];
455
456     if (tonal->count > 5)
457     {
458        for (i=0;i<9;i++)
459           tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
460     }
461
462     for (i=0;i<8;i++)
463     {
464        tonal->mem[i+24] = tonal->mem[i+16];
465        tonal->mem[i+16] = tonal->mem[i+8];
466        tonal->mem[i+8] = tonal->mem[i];
467        tonal->mem[i] = BFCC[i];
468     }
469     for (i=0;i<9;i++)
470        features[11+i] = celt_sqrt(tonal->std[i]);
471     features[20] = info->tonality;
472     features[21] = info->activity;
473     features[22] = frame_stationarity;
474     features[23] = info->tonality_slope;
475     features[24] = tonal->lowECount;
476
477 #ifndef FIXED_POINT
478     mlp_process(&net, features, frame_probs);
479     frame_probs[0] = .5f*(frame_probs[0]+1);
480     /* Curve fitting between the MLP probability and the actual probability */
481     frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
482     frame_probs[1] = .5*frame_probs[1]+.5;
483     frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
484
485     /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
486     {
487        float tau, beta;
488        float p0, p1;
489        float s0, m0;
490        float psum;
491        float speech0;
492        float music0;
493
494        /* One transition every 3 minutes */
495        tau = .00005f*frame_probs[1];
496        beta = .05f;
497        if (1) {
498           /* Adapt beta based on how "unexpected" the new prob is */
499           float p, q;
500           p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
501           q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
502           beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
503        }
504        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
505        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
506        p0 *= (float)pow(1-frame_probs[0], beta);
507        p1 *= (float)pow(frame_probs[0], beta);
508        tonal->music_prob = p1/(p0+p1);
509        info->music_prob = tonal->music_prob;
510
511        psum=1e-20f;
512        speech0 = (float)pow(1-frame_probs[0], beta);
513        music0  = (float)pow(frame_probs[0], beta);
514        if (tonal->count==1)
515        {
516           tonal->pspeech[0]=.5;
517           tonal->pmusic [0]=.5;
518        }
519        s0 = tonal->pspeech[0] + tonal->pspeech[1];
520        m0 = tonal->pmusic [0] + tonal->pmusic [1];
521        tonal->pspeech[0] = s0*(1-tau)*speech0;
522        tonal->pmusic [0] = m0*(1-tau)*music0;
523        for (i=1;i<DETECT_SIZE-1;i++)
524        {
525           tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
526           tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
527        }
528        tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
529        tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
530
531        for (i=0;i<DETECT_SIZE;i++)
532           psum += tonal->pspeech[i] + tonal->pmusic[i];
533        psum = 1.f/psum;
534        for (i=0;i<DETECT_SIZE;i++)
535        {
536           tonal->pspeech[i] *= psum;
537           tonal->pmusic [i] *= psum;
538        }
539        psum = tonal->pmusic[0];
540        for (i=1;i<DETECT_SIZE;i++)
541           psum += tonal->pspeech[i];
542
543        /* Estimate our confidence in the speech/music decisions */
544        if (frame_probs[1]>.75)
545        {
546           if (tonal->music_prob>.9)
547           {
548              float adapt;
549              adapt = 1.f/(++tonal->music_confidence_count);
550              tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
551              tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
552           }
553           if (tonal->music_prob<.1)
554           {
555              float adapt;
556              adapt = 1.f/(++tonal->speech_confidence_count);
557              tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
558              tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
559           }
560        } else {
561           if (tonal->music_confidence_count==0)
562              tonal->music_confidence = .9f;
563           if (tonal->speech_confidence_count==0)
564              tonal->speech_confidence = .1f;
565        }
566        psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum));
567     }
568     if (tonal->last_music != (tonal->music_prob>.5f))
569        tonal->last_transition=0;
570     tonal->last_music = tonal->music_prob>.5f;
571 #else
572     info->music_prob = 0;
573 #endif
574     /*for (i=0;i<25;i++)
575        printf("%f ", features[i]);
576     printf("\n");*/
577
578     info->bandwidth = bandwidth;
579     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
580     info->noisiness = frame_noisiness;
581     info->valid = 1;
582     if (info_out!=NULL)
583        OPUS_COPY(info_out, info, 1);
584     RESTORE_STACK;
585 }
586
587 int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
588                         const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
589                         int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
590 {
591    int offset;
592    int pcm_len;
593
594    /* Avoid overflow/wrap-around of the analysis buffer */
595    frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
596
597    pcm_len = frame_size - analysis->analysis_offset;
598    offset = 0;
599    do {
600       tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
601       offset += 480;
602       pcm_len -= 480;
603    } while (pcm_len>0);
604    analysis->analysis_offset = frame_size;
605
606    if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
607    {
608       int LM = 3;
609       LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps,
610             analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
611       while ((Fs/400<<LM)>frame_size)
612          LM--;
613       frame_size = (Fs/400<<LM);
614    } else {
615       frame_size = frame_size_select(frame_size, variable_duration, Fs);
616    }
617    if (frame_size<0)
618       return -1;
619    analysis->analysis_offset -= frame_size;
620
621    /* Only perform analysis up to 20-ms frames. Longer ones will be split if
622       they're in CELT-only mode. */
623    analysis_info->valid = 0;
624    tonality_get_info(analysis, analysis_info, frame_size);
625
626    return frame_size;
627 }