Oops. Thanks to Jim Crichton for pointing out that the complexity could end up
[speexdsp.git] / libspeex / cb_search.c
index 6ee343e..5c68826 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002 Jean-Marc Valin 
+/* Copyright (C) 2002-2006 Jean-Marc Valin 
    File: cb_search.c
 
    Redistribution and use in source and binary forms, with or without
 #include "stack_alloc.h"
 #include "vq.h"
 #include "misc.h"
-#include <stdio.h>
 
 #ifdef _USE_SSE
 #include "cb_search_sse.h"
 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
 #include "cb_search_arm4.h"
-#else
+#elif defined(BFIN_ASM)
+#include "cb_search_bfin.h"
+#endif
 
-static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
+#ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
+static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
 {
    int i, j, k;
+   VARDECL(spx_word16_t *shape);
+   ALLOC(shape, subvect_size, spx_word16_t);
    for (i=0;i<shape_cb_size;i++)
    {
       spx_word16_t *res;
-      const signed char *shape;
-
+      
       res = resp+i*subvect_size;
-      shape = shape_cb+i*subvect_size;
+      for (k=0;k<subvect_size;k++)
+         shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
       E[i]=0;
 
       /* Compute codeword response using convolution with impulse response */
       for(j=0;j<subvect_size;j++)
       {
          spx_word32_t resj=0;
+         spx_word16_t res16;
          for (k=0;k<=j;k++)
             resj = MAC16_16(resj,shape[k],r[j-k]);
 #ifdef FIXED_POINT
-         resj = SHR(resj, 11);
+         res16 = EXTRACT16(SHR32(resj, 13));
 #else
-         resj *= 0.03125;
+         res16 = 0.03125f*resj;
 #endif
          /* Compute codeword energy */
-         E[i]=ADD32(E[i],MULT16_16(resj,resj));
-         res[j] = resj;
+         E[i]=MAC16_16(E[i],res16,res16);
+         res[j] = res16;
          /*printf ("%d\n", (int)res[j]);*/
       }
    }
 
 }
+#endif
 
+#ifndef OVERRIDE_TARGET_UPDATE
+static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
+{
+   int n;
+   for (n=0;n<len;n++)
+      t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
+}
 #endif
 
 
 
-void split_cb_search_shape_sign_N1(
-spx_sig_t target[],                    /* target vector */
+static void split_cb_search_shape_sign_N1(
+spx_word16_t target[],                 /* target vector */
 spx_coef_t ak[],                       /* LPCs for this subframe */
 spx_coef_t awk1[],                     /* Weighted LPCs for this subframe */
 spx_coef_t awk2[],                     /* Weighted LPCs for this subframe */
@@ -91,58 +104,50 @@ const void *par,                      /* Codebook/search parameters*/
 int   p,                        /* number of LPC coeffs */
 int   nsf,                      /* number of samples in subframe */
 spx_sig_t *exc,
-spx_sig_t *r,
+spx_word16_t *r,
 SpeexBits *bits,
 char *stack,
-int   complexity,
 int   update_target
-                               )
+)
 {
-   int i,j,k,m,n,q;
-   spx_word16_t *resp;
+   int i,j,m,q;
+   VARDECL(spx_word16_t *resp);
 #ifdef _USE_SSE
-   __m128 *resp2;
-   __m128 *E;
+   VARDECL(__m128 *resp2);
+   VARDECL(__m128 *E);
 #else
    spx_word16_t *resp2;
-   spx_word32_t *E;
+   VARDECL(spx_word32_t *E);
 #endif
-   spx_word16_t *t;
-   spx_sig_t *e, *r2;
+   VARDECL(spx_word16_t *t);
+   VARDECL(spx_sig_t *e);
    const signed char *shape_cb;
    int shape_cb_size, subvect_size, nb_subvect;
-   split_cb_params *params;
-   int N=2;
+   const split_cb_params *params;
    int best_index;
    spx_word32_t best_dist;
    int have_sign;
-   N=complexity;
-   if (N>10)
-      N=10;
-   if (N<1)
-      N=1;
    
-   params = (split_cb_params *) par;
+   params = (const split_cb_params *) par;
    subvect_size = params->subvect_size;
    nb_subvect = params->nb_subvect;
    shape_cb_size = 1<<params->shape_bits;
    shape_cb = params->shape_cb;
    have_sign = params->have_sign;
-   resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t);
+   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
 #ifdef _USE_SSE
-   resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128);
-   E = PUSH(stack, shape_cb_size>>2, __m128);
+   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
+   ALLOC(E, shape_cb_size>>2, __m128);
 #else
    resp2 = resp;
-   E = PUSH(stack, shape_cb_size, spx_word32_t);
+   ALLOC(E, shape_cb_size, spx_word32_t);
 #endif
-   t = PUSH(stack, nsf, spx_word16_t);
-   e = PUSH(stack, nsf, spx_sig_t);
-   r2 = PUSH(stack, nsf, spx_sig_t);
+   ALLOC(t, nsf, spx_word16_t);
+   ALLOC(e, nsf, spx_sig_t);
    
-   /* FIXME: make that adaptive? */
+   /* FIXME: Do we still need to copy the target? */
    for (i=0;i<nsf;i++)
-      t[i]=SHR(target[i],6);
+      t[i]=target[i];
 
    compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
 
@@ -157,7 +162,6 @@ int   update_target
       
       speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
       
-      /* New code: update only enough of the target to calculate error*/
       {
          int rind;
          spx_word16_t *res;
@@ -171,19 +175,19 @@ int   update_target
          res = resp+rind*subvect_size;
          if (sign>0)
             for (m=0;m<subvect_size;m++)
-               t[subvect_size*i+m] -= res[m];
+               t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
          else
             for (m=0;m<subvect_size;m++)
-               t[subvect_size*i+m] += res[m];
+               t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
 
 #ifdef FIXED_POINT
          if (sign)
          {
             for (j=0;j<subvect_size;j++)
-               e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
+               e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
          } else {
             for (j=0;j<subvect_size;j++)
-               e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
+               e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
          }
 #else
          for (j=0;j<subvect_size;j++)
@@ -207,35 +211,33 @@ int   update_target
          q=subvect_size-m;
 #ifdef FIXED_POINT
          g=sign*shape_cb[rind*subvect_size+m];
-         for (n=subvect_size*(i+1);n<nsf;n++,q++)
-            t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q]));
 #else
          g=sign*0.03125*shape_cb[rind*subvect_size+m];
-         for (n=subvect_size*(i+1);n<nsf;n++,q++)
-            t[n] = SUB32(t[n],g*r[q]);
 #endif
+         target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
       }
-
-
    }
 
    /* Update excitation */
+   /* FIXME: We could update the excitation directly above */
    for (j=0;j<nsf;j++)
-      exc[j]+=e[j];
+      exc[j]=ADD32(exc[j],e[j]);
    
    /* Update target: only update target if necessary */
    if (update_target)
    {
+      VARDECL(spx_sig_t *r2);
+      ALLOC(r2, nsf, spx_sig_t);
       syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
       for (j=0;j<nsf;j++)
-         target[j]-=r2[j];
+         target[j]=SUB16(target[j],EXTRACT16(PSHR32(r2[j],8)));
    }
 }
 
 
 
 void split_cb_search_shape_sign(
-spx_sig_t target[],                    /* target vector */
+spx_word16_t target[],                 /* target vector */
 spx_coef_t ak[],                       /* LPCs for this subframe */
 spx_coef_t awk1[],                     /* Weighted LPCs for this subframe */
 spx_coef_t awk2[],                     /* Weighted LPCs for this subframe */
@@ -243,7 +245,7 @@ const void *par,                      /* Codebook/search parameters*/
 int   p,                        /* number of LPC coeffs */
 int   nsf,                      /* number of samples in subframe */
 spx_sig_t *exc,
-spx_sig_t *r,
+spx_word16_t *r,
 SpeexBits *bits,
 char *stack,
 int   complexity,
@@ -251,115 +253,125 @@ int   update_target
 )
 {
    int i,j,k,m,n,q;
-   spx_word16_t *resp;
+   VARDECL(spx_word16_t *resp);
 #ifdef _USE_SSE
-   __m128 *resp2;
-   __m128 *E;
+   VARDECL(__m128 *resp2);
+   VARDECL(__m128 *E);
 #else
    spx_word16_t *resp2;
-   spx_word32_t *E;
+   VARDECL(spx_word32_t *E);
 #endif
-   spx_word16_t *t;
-   spx_sig_t *e, *r2;
-   spx_word16_t *tmp;
-   spx_word32_t *ndist, *odist;
-   int *itmp;
+   VARDECL(spx_word16_t *t);
+   VARDECL(spx_sig_t *e);
+   VARDECL(spx_sig_t *r2);
+   VARDECL(spx_word16_t *tmp);
+   VARDECL(spx_word32_t *ndist);
+   VARDECL(spx_word32_t *odist);
+   VARDECL(int *itmp);
+   VARDECL(spx_word16_t **ot2);
+   VARDECL(spx_word16_t **nt2);
    spx_word16_t **ot, **nt;
-   int **nind, **oind;
-   int *ind;
+   VARDECL(int **nind);
+   VARDECL(int **oind);
+   VARDECL(int *ind);
    const signed char *shape_cb;
    int shape_cb_size, subvect_size, nb_subvect;
-   split_cb_params *params;
+   const split_cb_params *params;
    int N=2;
-   int *best_index;
-   spx_word32_t *best_dist;
+   VARDECL(int *best_index);
+   VARDECL(spx_word32_t *best_dist);
+   VARDECL(int *best_nind);
+   VARDECL(int *best_ntarget);
    int have_sign;
    N=complexity;
    if (N>10)
       N=10;
+   /* Complexity isn't as important for the codebooks as it is for the pitch */
+   N=(2*N)/3;
    if (N<1)
       N=1;
-   
    if (N==1)
    {
-      split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,complexity,update_target);
+      split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
       return;
    }
-   ot=PUSH(stack, N, spx_word16_t*);
-   nt=PUSH(stack, N, spx_word16_t*);
-   oind=PUSH(stack, N, int*);
-   nind=PUSH(stack, N, int*);
+   ALLOC(ot2, N, spx_word16_t*);
+   ALLOC(nt2, N, spx_word16_t*);
+   ALLOC(oind, N, int*);
+   ALLOC(nind, N, int*);
 
-   params = (split_cb_params *) par;
+   params = (const split_cb_params *) par;
    subvect_size = params->subvect_size;
    nb_subvect = params->nb_subvect;
    shape_cb_size = 1<<params->shape_bits;
    shape_cb = params->shape_cb;
    have_sign = params->have_sign;
-   resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t);
+   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
 #ifdef _USE_SSE
-   resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128);
-   E = PUSH(stack, shape_cb_size>>2, __m128);
+   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
+   ALLOC(E, shape_cb_size>>2, __m128);
 #else
    resp2 = resp;
-   E = PUSH(stack, shape_cb_size, spx_word32_t);
+   ALLOC(E, shape_cb_size, spx_word32_t);
 #endif
-   t = PUSH(stack, nsf, spx_word16_t);
-   e = PUSH(stack, nsf, spx_sig_t);
-   r2 = PUSH(stack, nsf, spx_sig_t);
-   ind = PUSH(stack, nb_subvect, int);
+   ALLOC(t, nsf, spx_word16_t);
+   ALLOC(e, nsf, spx_sig_t);
+   ALLOC(r2, nsf, spx_sig_t);
+   ALLOC(ind, nb_subvect, int);
 
-   tmp = PUSH(stack, 2*N*nsf, spx_word16_t);
+   ALLOC(tmp, 2*N*nsf, spx_word16_t);
    for (i=0;i<N;i++)
    {
-      ot[i]=tmp;
-      tmp += nsf;
-      nt[i]=tmp;
-      tmp += nsf;
+      ot2[i]=tmp+2*i*nsf;
+      nt2[i]=tmp+(2*i+1)*nsf;
    }
-   best_index = PUSH(stack, N, int);
-   best_dist = PUSH(stack, N, spx_word32_t);
-   ndist = PUSH(stack, N, spx_word32_t);
-   odist = PUSH(stack, N, spx_word32_t);
+   ot=ot2;
+   nt=nt2;
+   ALLOC(best_index, N, int);
+   ALLOC(best_dist, N, spx_word32_t);
+   ALLOC(best_nind, N, int);
+   ALLOC(best_ntarget, N, int);
+   ALLOC(ndist, N, spx_word32_t);
+   ALLOC(odist, N, spx_word32_t);
    
-   itmp = PUSH(stack, 2*N*nb_subvect, int);
+   ALLOC(itmp, 2*N*nb_subvect, int);
    for (i=0;i<N;i++)
    {
-      nind[i]=itmp;
-      itmp+=nb_subvect;
-      oind[i]=itmp;
-      itmp+=nb_subvect;
-      for (j=0;j<nb_subvect;j++)
-         nind[i][j]=oind[i][j]=-1;
+      nind[i]=itmp+2*i*nb_subvect;
+      oind[i]=itmp+(2*i+1)*nb_subvect;
    }
    
-   /* FIXME: make that adaptive? */
    for (i=0;i<nsf;i++)
-      t[i]=SHR(target[i],6);
+      t[i]=target[i];
 
    for (j=0;j<N;j++)
-      for (i=0;i<nsf;i++)
-         ot[j][i]=t[i];
-
-   /*for (i=0;i<nsf;i++)
-     printf ("%d\n", (int)t[i]);*/
+      speex_move(&ot[j][0], t, nsf*sizeof(spx_word16_t));
 
    /* Pre-compute codewords response and energy */
    compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
 
    for (j=0;j<N;j++)
       odist[j]=0;
+   
    /*For all subvectors*/
    for (i=0;i<nb_subvect;i++)
    {
       /*"erase" nbest list*/
       for (j=0;j<N;j++)
-         ndist[j]=-2;
+         ndist[j]=VERY_LARGE32;
 
       /*For all n-bests of previous subvector*/
       for (j=0;j<N;j++)
       {
          spx_word16_t *x=ot[j]+subvect_size*i;
+         spx_word32_t tener = 0;
+         for (m=0;m<subvect_size;m++)
+            tener = MAC16_16(tener, x[m],x[m]);
+#ifdef FIXED_POINT
+         tener = SHR32(tener,1);
+#else
+         tener *= .5;
+#endif
          /*Find new n-best based on previous n-best j*/
          if (have_sign)
             vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
@@ -369,97 +381,64 @@ int   update_target
          /*For all new n-bests*/
          for (k=0;k<N;k++)
          {
-            spx_word16_t *ct;
-            spx_word32_t err=0;
-            ct = ot[j];
-            /*update target*/
-
-            /*previous target*/
-            for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
-               t[m]=ct[m];
-
-            /* New code: update only enough of the target to calculate error*/
-            {
-               int rind;
-               spx_word16_t *res;
-               spx_word16_t sign=1;
-               rind = best_index[k];
-               if (rind>=shape_cb_size)
-               {
-                  sign=-1;
-                  rind-=shape_cb_size;
-               }
-               res = resp+rind*subvect_size;
-               if (sign>0)
-                  for (m=0;m<subvect_size;m++)
-                     t[subvect_size*i+m] -= res[m];
-               else
-                  for (m=0;m<subvect_size;m++)
-                     t[subvect_size*i+m] += res[m];
-            }
+            /* Compute total distance (including previous sub-vectors */
+            spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
             
-            /*compute error (distance)*/
-            err=odist[j];
-            for (m=i*subvect_size;m<(i+1)*subvect_size;m++)
-               err = MAC16_16(err, t[m],t[m]);
             /*update n-best list*/
-            if (err<ndist[N-1] || ndist[N-1]<-1)
+            if (err<ndist[N-1])
             {
-
-               /*previous target (we don't care what happened before*/
-               for (m=(i+1)*subvect_size;m<nsf;m++)
-                  t[m]=ct[m];
-               /* New code: update the rest of the target only if it's worth it */
-               for (m=0;m<subvect_size;m++)
-               {
-                  spx_word16_t g;
-                  int rind;
-                  spx_word16_t sign=1;
-                  rind = best_index[k];
-                  if (rind>=shape_cb_size)
-                  {
-                     sign=-1;
-                     rind-=shape_cb_size;
-                  }
-
-                  q=subvect_size-m;
-#ifdef FIXED_POINT
-                  g=sign*shape_cb[rind*subvect_size+m];
-                  for (n=subvect_size*(i+1);n<nsf;n++,q++)
-                     t[n] = SUB32(t[n],MULT16_16_Q11(g,r[q]));
-#else
-                  g=sign*0.03125*shape_cb[rind*subvect_size+m];
-                  for (n=subvect_size*(i+1);n<nsf;n++,q++)
-                     t[n] = SUB32(t[n],g*r[q]);
-#endif
-               }
-
-
                for (m=0;m<N;m++)
                {
-                  if (err < ndist[m] || ndist[m]<-1)
+                  if (err < ndist[m])
                   {
                      for (n=N-1;n>m;n--)
                      {
-                        for (q=(i+1)*subvect_size;q<nsf;q++)
-                           nt[n][q]=nt[n-1][q];
-                        for (q=0;q<nb_subvect;q++)
-                           nind[n][q]=nind[n-1][q];
-                        ndist[n]=ndist[n-1];
+                        ndist[n] = ndist[n-1];
+                        best_nind[n] = best_nind[n-1];
+                        best_ntarget[n] = best_ntarget[n-1];
                      }
-                     for (q=(i+1)*subvect_size;q<nsf;q++)
-                        nt[m][q]=t[q];
-                     for (q=0;q<nb_subvect;q++)
-                        nind[m][q]=oind[j][q];
-                     nind[m][i]=best_index[k];
-                     ndist[m]=err;
+                     ndist[m] = err;
+                     best_nind[n] = best_index[k];
+                     best_ntarget[n] = j;
                      break;
                   }
                }
             }
          }
          if (i==0)
-           break;
+            break;
+      }
+      for (j=0;j<N;j++)
+      {
+         /*previous target (we don't care what happened before*/
+         for (m=(i+1)*subvect_size;m<nsf;m++)
+            nt[j][m]=ot[best_ntarget[j]][m];
+         
+         /* New code: update the rest of the target only if it's worth it */
+         for (m=0;m<subvect_size;m++)
+         {
+            spx_word16_t g;
+            int rind;
+            spx_word16_t sign=1;
+            rind = best_nind[j];
+            if (rind>=shape_cb_size)
+            {
+               sign=-1;
+               rind-=shape_cb_size;
+            }
+
+            q=subvect_size-m;
+#ifdef FIXED_POINT
+            g=sign*shape_cb[rind*subvect_size+m];
+#else
+            g=sign*0.03125*shape_cb[rind*subvect_size+m];
+#endif
+            target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
+         }
+
+         for (q=0;q<nb_subvect;q++)
+            nind[j][q]=oind[best_ntarget[j]][q];
+         nind[j][i]=best_nind[j];
       }
 
       /*update old-new data*/
@@ -499,10 +478,10 @@ int   update_target
       if (sign==1)
       {
          for (j=0;j<subvect_size;j++)
-            e[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
+            e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
       } else {
          for (j=0;j<subvect_size;j++)
-            e[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[rind*subvect_size+j],SIG_SHIFT-5);
+            e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
       }
 #else
       for (j=0;j<subvect_size;j++)
@@ -511,14 +490,14 @@ int   update_target
    }   
    /* Update excitation */
    for (j=0;j<nsf;j++)
-      exc[j]+=e[j];
+      exc[j]=ADD32(exc[j],e[j]);
    
    /* Update target: only update target if necessary */
    if (update_target)
    {
       syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack);
       for (j=0;j<nsf;j++)
-         target[j]-=r2[j];
+         target[j]=SUB16(target[j],EXTRACT16(PSHR32(r2[j],8)));
    }
 }
 
@@ -528,25 +507,27 @@ spx_sig_t *exc,
 const void *par,                      /* non-overlapping codebook */
 int   nsf,                      /* number of samples in subframe */
 SpeexBits *bits,
-char *stack
+char *stack,
+spx_int32_t *seed
 )
 {
    int i,j;
-   int *ind, *signs;
+   VARDECL(int *ind);
+   VARDECL(int *signs);
    const signed char *shape_cb;
    int shape_cb_size, subvect_size, nb_subvect;
-   split_cb_params *params;
+   const split_cb_params *params;
    int have_sign;
 
-   params = (split_cb_params *) par;
+   params = (const split_cb_params *) par;
    subvect_size = params->subvect_size;
    nb_subvect = params->nb_subvect;
    shape_cb_size = 1<<params->shape_bits;
    shape_cb = params->shape_cb;
    have_sign = params->have_sign;
 
-   ind = PUSH(stack, nb_subvect, int);
-   signs = PUSH(stack, nb_subvect, int);
+   ALLOC(ind, nb_subvect, int);
+   ALLOC(signs, nb_subvect, int);
 
    /* Decode codewords and gains */
    for (i=0;i<nb_subvect;i++)
@@ -567,10 +548,10 @@ char *stack
       if (s==1)
       {
          for (j=0;j<subvect_size;j++)
-            exc[subvect_size*i+j]=SHL((spx_word32_t)shape_cb[ind[i]*subvect_size+j],SIG_SHIFT-5);
+            exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
       } else {
          for (j=0;j<subvect_size;j++)
-            exc[subvect_size*i+j]=-SHL((spx_word32_t)shape_cb[ind[i]*subvect_size+j],SIG_SHIFT-5);
+            exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
       }
 #else
       for (j=0;j<subvect_size;j++)
@@ -580,7 +561,7 @@ char *stack
 }
 
 void noise_codebook_quant(
-spx_sig_t target[],                    /* target vector */
+spx_word16_t target[],                 /* target vector */
 spx_coef_t ak[],                       /* LPCs for this subframe */
 spx_coef_t awk1[],                     /* Weighted LPCs for this subframe */
 spx_coef_t awk2[],                     /* Weighted LPCs for this subframe */
@@ -588,7 +569,7 @@ const void *par,                      /* Codebook/search parameters*/
 int   p,                        /* number of LPC coeffs */
 int   nsf,                      /* number of samples in subframe */
 spx_sig_t *exc,
-spx_sig_t *r,
+spx_word16_t *r,
 SpeexBits *bits,
 char *stack,
 int   complexity,
@@ -596,14 +577,16 @@ int   update_target
 )
 {
    int i;
-   spx_sig_t *tmp=PUSH(stack, nsf, spx_sig_t);
-   residue_percep_zero(target, ak, awk1, awk2, tmp, nsf, p, stack);
+   VARDECL(spx_sig_t *tmp);
+   ALLOC(tmp, nsf, spx_sig_t);
+   for (i=0;i<nsf;i++)
+      tmp[i]=PSHR32(EXTEND32(target[i]),SIG_SHIFT);
+   residue_percep_zero(tmp, ak, awk1, awk2, tmp, nsf, p, stack);
 
    for (i=0;i<nsf;i++)
       exc[i]+=tmp[i];
    for (i=0;i<nsf;i++)
       target[i]=0;
-
 }
 
 
@@ -612,8 +595,12 @@ spx_sig_t *exc,
 const void *par,                      /* non-overlapping codebook */
 int   nsf,                      /* number of samples in subframe */
 SpeexBits *bits,
-char *stack
+char *stack,
+spx_int32_t *seed
 )
 {
-   speex_rand_vec(1, exc, nsf);
+   int i;
+   /* FIXME: This is bad, but I don't think the function ever gets called anyway */
+   for (i=0;i<nsf;i++)
+      exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT);
 }