VAD seems to work better, though the code is now a complete mess :(
authorjm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>
Sun, 25 May 2003 03:01:47 +0000 (03:01 +0000)
committerjm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>
Sun, 25 May 2003 03:01:47 +0000 (03:01 +0000)
git-svn-id: http://svn.xiph.org/trunk/speex@4799 0101bb08-14d6-0310-b084-bc0e0c8e3800

libspeex/denoise.c

index baf383a..97934b1 100644 (file)
@@ -395,8 +395,14 @@ int speex_denoise(SpeexDenoiseState *st, float *x)
       float bands[NB_BANDS];
       int j;
       float p0, p1;
+      float tot_loudness=0;
       float x = sqrt(mean_post);
 
+      for (i=5;i<N-10;i++)
+      {
+         tot_loudness += scale*st->ps[i] * st->loudness_weight[i];
+      }
+
       for (i=0;i<NB_BANDS;i++)
       {
          bands[i]=1e4;
@@ -453,7 +459,9 @@ int speex_denoise(SpeexDenoiseState *st, float *x)
       for (i=0;i<NB_BANDS;i++)
       {
          float noise_var, speech_var;
+         float noise_mean, speech_mean;
          float tmp1, tmp2, pr;
+
          /*noise_var = 1.01*st->noise_bands2[i] - st->noise_bands[i]*st->noise_bands[i];
            speech_var = 1.01*st->speech_bands2[i] - st->speech_bands[i]*st->speech_bands[i];*/
          noise_var = st->noise_bands2[i];
@@ -475,8 +483,13 @@ int speex_denoise(SpeexDenoiseState *st, float *x)
          if (bands[i] > st->speech_bands[i])
             noise_var = speech_var;
 
-         tmp1 = exp(-.5*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i])/speech_var)/sqrt(2*M_PI*speech_var);
-         tmp2 = exp(-.5*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i])/noise_var)/sqrt(2*M_PI*noise_var);
+         speech_mean = st->speech_bands[i];
+         noise_mean = st->noise_bands[i];
+         if (noise_mean < speech_mean - 5)
+            noise_mean = speech_mean - 5;
+
+         tmp1 = exp(-.5*(bands[i]-speech_mean)*(bands[i]-speech_mean)/speech_var)/sqrt(2*M_PI*speech_var);
+         tmp2 = exp(-.5*(bands[i]-noise_mean)*(bands[i]-noise_mean)/noise_var)/sqrt(2*M_PI*noise_var);
          /*fprintf (stderr, "%f ", (float)(p0/(.01+p0+p1)));*/
          /*fprintf (stderr, "%f ", (float)(bands[i]));*/
          pr = tmp1/(1e-25+tmp1+tmp2);
@@ -495,24 +508,41 @@ int speex_denoise(SpeexDenoiseState *st, float *x)
 
       p0 = pow(p0,.3);
       p1 = pow(p1,.3);      
-
-      if (p0>100*p1)
-         p0=100*p1;
       
+#if 1
+      p0=p0/(p1+p0);
+      {
+         float tmp = sqrt(tot_loudness)/st->loudness2;
+         tmp = 1-exp(-8*tmp);
+         if (p0>tmp)
+            p0=tmp;
+      }
+      p1=1-p0;
+#else
+      if (sqrt(tot_loudness) < .6*st->loudness2 && p0>15*p1)
+         p0=15*p1;
+      if (sqrt(tot_loudness) < .45*st->loudness2 && p0>7*p1)
+         p0=7*p1;
+      if (sqrt(tot_loudness) < .3*st->loudness2 && p0>3*p1)
+         p0=3*p1;
+      if (sqrt(tot_loudness) < .15*st->loudness2 && p0>p1)
+         p0=p1;
+      /*fprintf (stderr, "%f %f ", (float)(sqrt(tot_loudness) /( .25*st->loudness2)), p0/(p1+p0));*/
+#endif
 
       p0 *= .99*st->speech_prob + .01*(1-st->speech_prob);
       p1 *= .01*st->speech_prob + .99*(1-st->speech_prob);
       
       st->speech_prob = p0/(1e-25+p1+p0);
-      /*fprintf (stderr, "%f ", st->speech_prob);*/
+      /*fprintf (stderr, "%f %f %f ", tot_loudness, st->loudness2, st->speech_prob);*/
 
-      if (st->speech_prob>.5 || (st->last_speech < 10 && st->speech_prob>.25))
+      if (st->speech_prob>.5 || (st->last_speech < 15 && st->speech_prob>.25))
       {
          is_speech = 1;
          st->last_speech = 0;
       } else {
          st->last_speech++;
-         if (st->last_speech<10)
+         if (st->last_speech<15)
            is_speech = 1;
       }
 
@@ -620,11 +650,14 @@ int speex_denoise(SpeexDenoiseState *st, float *x)
    {
       for (i=0;i<N;i++)
          st->gain2[i] = 0;
-   } else {
+   }
+#if 0
+ else {
       for (i=0;i<N;i++)
          st->gain2[i] = 1;
    }
 #endif
+#endif
    /* Apply computed gain */
    for (i=1;i<N;i++)
    {