Speed improvements (got rid of a couple divide ops), cleanup...
[speexdsp.git] / libspeex / cb_search.c
index a280ce4..8584175 100644 (file)
@@ -51,6 +51,7 @@ static float scal_gains4[16] = {
    2.42801
 };
 
    2.42801
 };
 
+
 /*---------------------------------------------------------------------------*\
                                                                              
  void overlap_cb_search()                                                            
 /*---------------------------------------------------------------------------*\
                                                                              
  void overlap_cb_search()                                                            
@@ -156,7 +157,7 @@ float *stack
 {
    int i,j, id;
    float *resp, *E, q;
 {
    int i,j, id;
    float *resp, *E, q;
-   float *t, *r, *e, *tresp;
+   float *t, *r, *e;
    float *gains;
    int *ind;
    float *shape_cb;
    float *gains;
    int *ind;
    float *shape_cb;
@@ -170,7 +171,6 @@ float *stack
    shape_cb_size = 1<<params->shape_bits;
    shape_cb = params->shape_cb;
    resp = PUSH(stack, shape_cb_size*subvect_size);
    shape_cb_size = 1<<params->shape_bits;
    shape_cb = params->shape_cb;
    resp = PUSH(stack, shape_cb_size*subvect_size);
-   tresp = PUSH(stack, shape_cb_size*nsf);
    E = PUSH(stack, shape_cb_size);
    t = PUSH(stack, nsf);
    r = PUSH(stack, nsf);
    E = PUSH(stack, shape_cb_size);
    t = PUSH(stack, nsf);
    r = PUSH(stack, nsf);
@@ -201,38 +201,49 @@ float *stack
    for (i=0;i<nsf;i++)
       t[i]=target[i];
 
    for (i=0;i<nsf;i++)
       t[i]=target[i];
 
+   e[0]=1;
+   for (i=1;i<nsf;i++)
+      e[i]=0;
+   residue_zero(e, awk1, r, nsf, p);
+   syn_filt_zero(r, ak, r, nsf, p);
+   syn_filt_zero(r, awk2, r, nsf,p);
+   
    /* Pre-compute codewords response and energy */
    for (i=0;i<shape_cb_size;i++)
    {
       float *res = resp+i*subvect_size;
 
       /* Compute codeword response */
    /* Pre-compute codewords response and energy */
    for (i=0;i<shape_cb_size;i++)
    {
       float *res = resp+i*subvect_size;
 
       /* Compute codeword response */
-      residue_zero(shape_cb+i*subvect_size, awk1, res, subvect_size, p);
-      syn_filt_zero(res, ak, res, subvect_size, p);
-      syn_filt_zero(res, awk2, res, subvect_size,p);
-
+      int k;
+      for(j=0;j<subvect_size;j++)
+         res[j]=0;
+      for(j=0;j<subvect_size;j++)
+      {
+         for (k=j;k<subvect_size;k++)
+            res[k]+=shape_cb[i*subvect_size+j]*r[k-j];
+      }
       /* Compute energy of codeword response */
       E[i]=0;
       for(j=0;j<subvect_size;j++)
          E[i]+=res[j]*res[j];
       /* Compute energy of codeword response */
       E[i]=0;
       for(j=0;j<subvect_size;j++)
          E[i]+=res[j]*res[j];
-      
+      E[i]=1/(.001+E[i]);
    }
 
    for (i=0;i<nb_subvect;i++)
    {
    }
 
    for (i=0;i<nb_subvect;i++)
    {
-      int best_index=0;
+      int best_index=0, k, m;
       float g, corr, best_gain=0, score, best_score=-1;
       /* Find best codeword for current sub-vector */
       for (j=0;j<shape_cb_size;j++)
       {
          corr=xcorr(resp+j*subvect_size,t+subvect_size*i,subvect_size);
       float g, corr, best_gain=0, score, best_score=-1;
       /* Find best codeword for current sub-vector */
       for (j=0;j<shape_cb_size;j++)
       {
          corr=xcorr(resp+j*subvect_size,t+subvect_size*i,subvect_size);
-         score=corr*corr/(.001+E[j]);
-         g = corr/(.001+E[j]);
+         score=corr*corr*E[j];
+         g = corr*E[j];
          if (score>best_score)
          {
             best_index=j;
             best_score=score;
          if (score>best_score)
          {
             best_index=j;
             best_score=score;
-            best_gain=corr/(.001+E[j]);
+            best_gain=g;
          }
       }
       frame_bits_pack(bits,best_index,params->shape_bits);
          }
       }
       frame_bits_pack(bits,best_index,params->shape_bits);
@@ -260,18 +271,13 @@ float *stack
       }
       ind[i]=best_index;
       gains[i]=best_gain;
       }
       ind[i]=best_index;
       gains[i]=best_gain;
-
-      for (j=0;j<nsf;j++)
-         e[j]=0;
+      /* Update target for next subvector */
       for (j=0;j<subvect_size;j++)
       for (j=0;j<subvect_size;j++)
-         e[subvect_size*i+j]=best_gain*shape_cb[best_index*subvect_size+j];
-      residue_zero(e, awk1, r, nsf, p);
-      syn_filt_zero(r, ak, r, nsf, p);
-      syn_filt_zero(r, awk2, r, nsf,p);
-      for (j=0;j<nsf;j++)
-         tresp[i*nsf+j]=r[j];
-      for (j=0;j<nsf;j++)
-         t[j]-=r[j];
+      {
+         g=best_gain*shape_cb[best_index*subvect_size+j];
+         for (k=subvect_size*i+j,m=0;k<nsf;k++,m++)
+            t[k] -= g*r[m];
+      }
    }
    
    /* Put everything back together */
    }
    
    /* Put everything back together */
@@ -299,7 +305,6 @@ float *stack
    POP(stack);
    POP(stack);
    POP(stack);
    POP(stack);
    POP(stack);
    POP(stack);
-   POP(stack);
 }
 
 
 }