Correct analysis downmix scaling factor
[opus.git] / src / analysis.c
index 54005d3..62b3937 100644 (file)
 #include "config.h"
 #endif
 
+#define ANALYSIS_C
+
+#include <stdio.h>
+
+#include "mathops.h"
 #include "kiss_fft.h"
 #include "celt.h"
 #include "modes.h"
 #include "arch.h"
 #include "quant_bands.h"
-#include <stdio.h>
 #include "analysis.h"
 #include "mlp.h"
-
-extern const MLP net;
+#include "stack_alloc.h"
 
 #ifndef M_PI
 #define M_PI 3.141592653
@@ -101,7 +104,7 @@ static const int tbands[NB_TBANDS+1] = {
 };
 
 static const int extra_bands[NB_TOT_BANDS+1] = {
-      0, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
+      1, 2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
 };
 
 /*static const float tweight[NB_TBANDS+1] = {
@@ -110,38 +113,24 @@ static const int extra_bands[NB_TOT_BANDS+1] = {
 
 #define NB_TONAL_SKIP_BANDS 9
 
-#define cA 0.43157974f
-#define cB 0.67848403f
-#define cC 0.08595542f
-#define cE ((float)M_PI/2)
-static inline float fast_atan2f(float y, float x) {
-   float x2, y2;
-   /* Should avoid underflow on the values we'll get */
-   if (ABS16(x)+ABS16(y)<1e-9f)
-   {
-      x*=1e12f;
-      y*=1e12f;
-   }
-   x2 = x*x;
-   y2 = y*y;
-   if(x2<y2){
-      float den = (y2 + cB*x2) * (y2 + cC*x2);
-      if (den!=0)
-         return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
-      else
-         return (y<0 ? -cE : cE);
-   }else{
-      float den = (x2 + cB*y2) * (x2 + cC*y2);
-      if (den!=0)
-         return  x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
-      else
-         return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
-   }
+
+void tonality_analysis_init(TonalityAnalysisState *tonal)
+{
+  /* Initialize reusable fields. */
+  tonal->arch = opus_select_arch();
+  /* Clear remaining fields. */
+  tonality_analysis_reset(tonal);
+}
+
+void tonality_analysis_reset(TonalityAnalysisState *tonal)
+{
+  /* Clear non-reusable fields. */
+  char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START;
+  OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
 }
 
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
 {
-#if 1
    int pos;
    int curr_lookahead;
    float psum;
@@ -177,55 +166,39 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
    curr_lookahead = IMAX(curr_lookahead-10, 0);
 
    psum=0;
+   /* Summing the probability of transition patterns that involve music at
+      time (DETECT_SIZE-curr_lookahead-1) */
    for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
       psum += tonal->pmusic[i];
    for (;i<DETECT_SIZE;i++)
       psum += tonal->pspeech[i];
-   /*printf("%f %f\n", psum, info_out->music_prob);*/
+   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
+   /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
 
    info_out->music_prob = psum;
-#else
-   /* If data not available, return invalid */
-   if (tonal->read_pos==tonal->write_pos)
-   {
-      info_out->valid=0;
-      return;
-   }
-
-   OPUS_COPY(info_out, &tonal->info[tonal->read_pos], 1);
-   tonal->read_subframe += len/480;
-   while (tonal->read_subframe>=4)
-   {
-      tonal->read_subframe -= 4;
-      tonal->read_pos++;
-   }
-   if (tonal->read_pos>=DETECT_SIZE)
-      tonal->read_pos-=DETECT_SIZE;
-   if (tonal->read_pos == tonal->write_pos)
-   {
-      tonal->read_pos = tonal->write_pos-1;
-      if (tonal->read_pos<0)
-         tonal->read_pos=DETECT_SIZE-1;
-      tonal->read_subframe = 3;
-   }
-#endif
 }
 
-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
+static const float std_feature_bias[9] = {
+      5.684947, 3.475288, 1.770634, 1.599784, 3.773215,
+      2.163313, 1.260756, 1.116868, 1.918795
+};
+
+static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
 {
     int i, b;
     const kiss_fft_state *kfft;
-    kiss_fft_cpx in[480], out[480];
+    VARDECL(kiss_fft_cpx, in);
+    VARDECL(kiss_fft_cpx, out);
     int N = 480, N2=240;
     float * OPUS_RESTRICT A = tonal->angle;
     float * OPUS_RESTRICT dA = tonal->d_angle;
     float * OPUS_RESTRICT d2A = tonal->d2_angle;
-    float tonality[240];
-    float noisiness[240];
+    VARDECL(float, tonality);
+    VARDECL(float, noisiness);
     float band_tonality[NB_TBANDS];
     float logE[NB_TBANDS];
     float BFCC[8];
-    float features[100];
+    float features[25];
     float frame_tonality;
     float max_frame_tonality;
     /*float tw_sum=0;*/
@@ -234,7 +207,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     float slope=0;
     float frame_stationarity;
     float relativeE;
-    float frame_prob;
+    float frame_probs[2];
     float alpha, alphaE, alphaE2;
     float frame_loudness;
     float bandwidth_mask;
@@ -243,41 +216,56 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     float noise_floor;
     int remaining;
     AnalysisInfo *info;
+    SAVE_STACK;
 
     tonal->last_transition++;
     alpha = 1.f/IMIN(20, 1+tonal->count);
     alphaE = 1.f/IMIN(50, 1+tonal->count);
-    alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
+    alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
 
     if (tonal->count<4)
        tonal->music_prob = .5;
     kfft = celt_mode->mdct.kfft[0];
     if (tonal->count==0)
        tonal->mem_fill = 240;
-    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
     if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
     {
        tonal->mem_fill += len;
        /* Don't have enough to update the analysis */
+       RESTORE_STACK;
        return;
     }
     info = &tonal->info[tonal->write_pos++];
     if (tonal->write_pos>=DETECT_SIZE)
        tonal->write_pos-=DETECT_SIZE;
 
+    ALLOC(in, 480, kiss_fft_cpx);
+    ALLOC(out, 480, kiss_fft_cpx);
+    ALLOC(tonality, 240, float);
+    ALLOC(noisiness, 240, float);
     for (i=0;i<N2;i++)
     {
        float w = analysis_window[i];
-       in[i].r = MULT16_16(w, tonal->inmem[i]);
-       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
-       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
-       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
+       in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]);
+       in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]);
+       in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]);
+       in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]);
     }
     OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
-    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
     tonal->mem_fill = 240 + remaining;
-    opus_fft(kfft, in, out);
+    opus_fft(kfft, in, out, tonal->arch);
+#ifndef FIXED_POINT
+    /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
+    if (celt_isnan(out[0].r))
+    {
+       info->valid = 0;
+       RESTORE_STACK;
+       return;
+    }
+#endif
 
     for (i=1;i<N2;i++)
     {
@@ -285,10 +273,10 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        float angle, d_angle, d2_angle;
        float angle2, d_angle2, d2_angle2;
        float mod1, mod2, avg_mod;
-       X1r = out[i].r+out[N-i].r;
-       X1i = out[i].i-out[N-i].i;
-       X2r = out[i].i+out[N-i].i;
-       X2i = out[N-i].r-out[i].r;
+       X1r = (float)out[i].r+out[N-i].r;
+       X1i = (float)out[i].i-out[N-i].i;
+       X2r = (float)out[i].i+out[N-i].i;
+       X2i = (float)out[N-i].r-out[i].r;
 
        angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r);
        d_angle = angle - A[i];
@@ -332,7 +320,6 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
     }
     relativeE = 0;
     frame_loudness = 0;
-    bandwidth_mask = 0;
     for (b=0;b<NB_TBANDS;b++)
     {
        float E=0, tE=0, nE=0;
@@ -340,16 +327,30 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        float stationarity;
        for (i=tbands[b];i<tbands[b+1];i++)
        {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
+#ifdef FIXED_POINT
+          /* FIXME: It's probably best to change the BFCC filter initial state instead */
+          binE *= 5.55e-17f;
+#endif
           E += binE;
           tE += binE*tonality[i];
           nE += binE*2.f*(.5f-noisiness[i]);
        }
+#ifndef FIXED_POINT
+       /* Check for extreme band energies that could cause NaNs later. */
+       if (!(E<1e9f) || celt_isnan(E))
+       {
+          info->valid = 0;
+          RESTORE_STACK;
+          return;
+       }
+#endif
+
        tonal->E[tonal->E_count][b] = E;
        frame_noisiness += nE/(1e-15f+E);
 
-       frame_loudness += celt_sqrt(E+1e-10f);
+       frame_loudness += (float)sqrt(E+1e-10f);
        logE[b] = (float)log(E+1e-10f);
        tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
        tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
@@ -358,21 +359,21 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
           tonal->highE[b]+=.5f;
           tonal->lowE[b]-=.5f;
        }
-       relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
+       relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
 
        L1=L2=0;
        for (i=0;i<NB_FRAMES;i++)
        {
-          L1 += celt_sqrt(tonal->E[i][b]);
+          L1 += (float)sqrt(tonal->E[i][b]);
           L2 += tonal->E[i][b];
        }
 
-       stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2));
+       stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2));
        stationarity *= stationarity;
        stationarity *= stationarity;
        frame_stationarity += stationarity;
        /*band_tonality[b] = tE/(1e-15+E)*/;
-       band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
+       band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]);
 #if 0
        if (b>=NB_TONAL_SKIP_BANDS)
        {
@@ -392,36 +393,37 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
 
     bandwidth_mask = 0;
     bandwidth = 0;
-    for (b=0;b<NB_TOT_BANDS;b++)
-       maxE = MAX32(maxE, tonal->meanE[b]);
+    maxE = 0;
     noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
+#ifdef FIXED_POINT
+    noise_floor *= 1<<(15+SIG_SHIFT);
+#endif
     noise_floor *= noise_floor;
     for (b=0;b<NB_TOT_BANDS;b++)
     {
        float E=0;
        int band_start, band_end;
        /* Keep a margin of 300 Hz for aliasing */
-       band_start = extra_bands[b]+3;
-       band_end = extra_bands[b+1]+3;
+       band_start = extra_bands[b];
+       band_end = extra_bands[b+1];
        for (i=band_start;i<band_end;i++)
        {
-          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
-                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+                     + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
           E += binE;
        }
-       E /= (band_end-band_start);
        maxE = MAX32(maxE, E);
-       if (tonal->count>2)
-       {
-          tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
-       } else {
-          tonal->meanE[b] = E;
-       }
+       tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
        E = MAX32(E, tonal->meanE[b]);
-       /* 13 dB slope for spreading function */
+       /* Use a simple follower with 13 dB/Bark slope for spreading function */
        bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
-       /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
-       if (E>.1*bandwidth_mask && E*1e10f > maxE && E > noise_floor)
+       /* Consider the band "active" only if all these conditions are met:
+          1) less than 10 dB below the simple follower
+          2) less than 90 dB below the peak band (maximal masking possible considering
+             both the ATH and the loudness-dependent slope of the spreading function)
+          3) above the PCM quantization noise floor
+       */
+       if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
           bandwidth = b;
     }
     if (tonal->count<=2)
@@ -486,58 +488,93 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        tonal->mem[i] = BFCC[i];
     }
     for (i=0;i<9;i++)
-       features[11+i] = celt_sqrt(tonal->std[i]);
-    features[20] = info->tonality;
-    features[21] = info->activity;
-    features[22] = frame_stationarity;
-    features[23] = info->tonality_slope;
-    features[24] = tonal->lowECount;
-
-#ifndef FIXED_POINT
-    mlp_process(&net, features, &frame_prob);
-    frame_prob = .5f*(frame_prob+1);
+       features[11+i] = (float)sqrt(tonal->std[i]) - std_feature_bias[i];
+    features[20] = info->tonality - 0.154723;
+    features[21] = info->activity - 0.724643;
+    features[22] = frame_stationarity - 0.743717;
+    features[23] = info->tonality_slope + 0.069216;
+    features[24] = tonal->lowECount - 0.067930;
+
+#ifndef DISABLE_FLOAT_API
+    mlp_process(&net, features, frame_probs);
+    frame_probs[0] = .5f*(frame_probs[0]+1);
     /* Curve fitting between the MLP probability and the actual probability */
-    frame_prob = .01f + 1.21f*frame_prob*frame_prob - .23f*(float)pow(frame_prob, 10);
+    /*frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);*/
+    /* Probability of active audio (as opposed to silence) */
+    frame_probs[1] = .5f*frame_probs[1]+.5f;
+    frame_probs[1] *= frame_probs[1];
+    /* Consider that silence has a 50-50 probability. */
+    frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
 
-    /*printf("%f\n", frame_prob);*/
+    /* Probability of speech or music vs noise */
+    info->activity_probability = frame_probs[1];
+
+    /*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/
     {
-       float tau, beta;
+       /* Probability of state transition */
+       float tau;
+       /* Represents independence of the MLP probabilities, where
+          beta=1 means fully independent. */
+       float beta;
+       /* Denormalized probability of speech (p0) and music (p1) after update */
        float p0, p1;
-       float max_certainty;
-       /* One transition every 3 minutes */
-       tau = .00005f;
-       beta = .1f;
-       max_certainty = .01f+1.f/(20.f+.5f*tonal->last_transition);
-       max_certainty = 0;
+       /* Probabilities for "all speech" and "all music" */
+       float s0, m0;
+       /* Probability sum for renormalisation */
+       float psum;
+       /* Instantaneous probability of speech and music, with beta pre-applied. */
+       float speech0;
+       float music0;
+       float p, q;
+
+       /* One transition every 3 minutes of active audio */
+       tau = .00005f*frame_probs[1];
+       /* Adapt beta based on how "unexpected" the new prob is */
+       p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+       q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+       beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
+       /* p0 and p1 are the probabilities of speech and music at this frame
+          using only information from previous frame and applying the
+          state transition model */
        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
-       p0 *= (float)pow(1-frame_prob, beta);
-       p1 *= (float)pow(frame_prob, beta);
-       tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
+       /* We apply the current probability with exponent beta to work around
+          the fact that the probability estimates aren't independent. */
+       p0 *= (float)pow(1-frame_probs[0], beta);
+       p1 *= (float)pow(frame_probs[0], beta);
+       /* Normalise the probabilities to get the Marokv probability of music. */
+       tonal->music_prob = p1/(p0+p1);
        info->music_prob = tonal->music_prob;
-       info->music_prob = frame_prob;
 
-       float psum=1e-20;
-       float speech0 = (float)pow(1-frame_prob, beta);
-       float music0  = (float)pow(frame_prob, beta);
+       /* This chunk of code deals with delayed decision. */
+       psum=1e-20f;
+       /* Instantaneous probability of speech and music, with beta pre-applied. */
+       speech0 = (float)pow(1-frame_probs[0], beta);
+       music0  = (float)pow(frame_probs[0], beta);
        if (tonal->count==1)
        {
           tonal->pspeech[0]=.5;
           tonal->pmusic [0]=.5;
        }
-       float s0, m0;
+       /* Updated probability of having only speech (s0) or only music (m0),
+          before considering the new observation. */
        s0 = tonal->pspeech[0] + tonal->pspeech[1];
        m0 = tonal->pmusic [0] + tonal->pmusic [1];
+       /* Updates s0 and m0 with instantaneous probability. */
        tonal->pspeech[0] = s0*(1-tau)*speech0;
        tonal->pmusic [0] = m0*(1-tau)*music0;
+       /* Propagate the transition probabilities */
        for (i=1;i<DETECT_SIZE-1;i++)
        {
           tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
           tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
        }
+       /* Probability that the latest frame is speech, when all the previous ones were music. */
        tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
+       /* Probability that the latest frame is music, when all the previous ones were speech. */
        tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
 
+       /* Renormalise probabilities to 1 */
        for (i=0;i<DETECT_SIZE;i++)
           psum += tonal->pspeech[i] + tonal->pmusic[i];
        psum = 1.f/psum;
@@ -550,7 +587,29 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        for (i=1;i<DETECT_SIZE;i++)
           psum += tonal->pspeech[i];
 
-       /*printf("%f %f %f\n", frame_prob, info->music_prob, psum);*/
+       /* Estimate our confidence in the speech/music decisions */
+       if (frame_probs[1]>.75)
+       {
+          if (tonal->music_prob>.9)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->music_confidence_count);
+             tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
+             tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
+          }
+          if (tonal->music_prob<.1)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->speech_confidence_count);
+             tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
+             tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
+          }
+       } else {
+          if (tonal->music_confidence_count==0)
+             tonal->music_confidence = .9f;
+          if (tonal->speech_confidence_count==0)
+             tonal->speech_confidence = .1f;
+       }
     }
     if (tonal->last_music != (tonal->music_prob>.5f))
        tonal->last_transition=0;
@@ -562,64 +621,37 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
        printf("%f ", features[i]);
     printf("\n");*/
 
-    if (bandwidth<=12 || (bandwidth==13 && tonal->opus_bandwidth == OPUS_BANDWIDTH_NARROWBAND))
-       tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
-    else if (bandwidth<=14 || (bandwidth==15 && tonal->opus_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND))
-       tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
-    else if (bandwidth<=16 || (bandwidth==17 && tonal->opus_bandwidth == OPUS_BANDWIDTH_WIDEBAND))
-       tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
-    else if (bandwidth<=18)
-       tonal->opus_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
-    else
-       tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
-
     info->bandwidth = bandwidth;
-    info->opus_bandwidth = tonal->opus_bandwidth;
     /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
     info->noisiness = frame_noisiness;
     info->valid = 1;
-    if (info_out!=NULL)
-       OPUS_COPY(info_out, info, 1);
+    RESTORE_STACK;
 }
 
-int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
-                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
-                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
+                 int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
 {
    int offset;
    int pcm_len;
 
-   /* Avoid overflow/wrap-around of the analysis buffer */
-   frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
-
-   pcm_len = frame_size - analysis->analysis_offset;
-   offset = 0;
-   do {
-      tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
-      offset += 480;
-      pcm_len -= 480;
-   } while (pcm_len>0);
-   analysis->analysis_offset = frame_size;
-
-   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   if (analysis_pcm != NULL)
    {
-      int LM = 3;
-      LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps,
-            analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
-      while ((Fs/400<<LM)>frame_size)
-         LM--;
-      frame_size = (Fs/400<<LM);
-   } else {
-      frame_size = frame_size_select(frame_size, variable_duration, Fs);
+      /* Avoid overflow/wrap-around of the analysis buffer */
+      analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size);
+
+      pcm_len = analysis_frame_size - analysis->analysis_offset;
+      offset = analysis->analysis_offset;
+      do {
+         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         offset += 480;
+         pcm_len -= 480;
+      } while (pcm_len>0);
+      analysis->analysis_offset = analysis_frame_size;
+
+      analysis->analysis_offset -= frame_size;
    }
-   if (frame_size<0)
-      return -1;
-   analysis->analysis_offset -= frame_size;
 
-   /* Only perform analysis up to 20-ms frames. Longer ones will be split if
-      they're in CELT-only mode. */
    analysis_info->valid = 0;
    tonality_get_info(analysis, analysis_info, frame_size);
-
-   return frame_size;
 }