an alternative implementation of pitch_xcorr for machines with enough
authorjm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>
Mon, 7 Feb 2005 04:00:03 +0000 (04:00 +0000)
committerjm <jm@0101bb08-14d6-0310-b084-bc0e0c8e3800>
Mon, 7 Feb 2005 04:00:03 +0000 (04:00 +0000)
registers (more than x86, that is).

git-svn-id: http://svn.xiph.org/trunk/speex@8843 0101bb08-14d6-0310-b084-bc0e0c8e3800

libspeex/ltp.c

index 5934e56..e329955 100644 (file)
@@ -117,8 +117,73 @@ static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int
 }
 #endif
 
+#if 0
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
 {
+   int i,j;
+   for (i=0;i<nb_pitch;i+=4)
+   {
+      /* Compute correlation*/
+      //corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);
+      spx_word32_t sum1=0;
+      spx_word32_t sum2=0;
+      spx_word32_t sum3=0;
+      spx_word32_t sum4=0;
+      const spx_word16_t *y = _y+i;
+      const spx_word16_t *x = _x;
+      spx_word16_t y0, y1, y2, y3;
+      //y0=y[0];y1=y[1];y2=y[2];y3=y[3];
+      y0=*y++;
+      y1=*y++;
+      y2=*y++;
+      y3=*y++;
+      for (j=0;j<len;j+=4)
+      {
+         spx_word32_t part1;
+         spx_word32_t part2;
+         spx_word32_t part3;
+         spx_word32_t part4;
+         part1 = MULT16_16(*x,y0);
+         part2 = MULT16_16(*x,y1);
+         part3 = MULT16_16(*x,y2);
+         part4 = MULT16_16(*x,y3);
+         x++;
+         y0=*y++;
+         part1 = MAC16_16(part1,*x,y1);
+         part2 = MAC16_16(part2,*x,y2);
+         part3 = MAC16_16(part3,*x,y3);
+         part4 = MAC16_16(part4,*x,y0);
+         x++;
+         y1=*y++;
+         part1 = MAC16_16(part1,*x,y2);
+         part2 = MAC16_16(part2,*x,y3);
+         part3 = MAC16_16(part3,*x,y0);
+         part4 = MAC16_16(part4,*x,y1);
+         x++;
+         y2=*y++;
+         part1 = MAC16_16(part1,*x,y3);
+         part2 = MAC16_16(part2,*x,y0);
+         part3 = MAC16_16(part3,*x,y1);
+         part4 = MAC16_16(part4,*x,y2);
+         x++;
+         y3=*y++;
+         
+         sum1 = ADD32(sum1,SHR(part1,6));
+         sum2 = ADD32(sum2,SHR(part2,6));
+         sum3 = ADD32(sum3,SHR(part3,6));
+         sum4 = ADD32(sum4,SHR(part4,6));
+      }
+      corr[nb_pitch-1-i]=sum1;
+      corr[nb_pitch-2-i]=sum2;
+      corr[nb_pitch-3-i]=sum3;
+      corr[nb_pitch-4-i]=sum4;
+   }
+
+}
+#else
+static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word
+32_t *corr, int len, int nb_pitch, char *stack)
+{
    int i;
    for (i=0;i<nb_pitch;i++)
    {
@@ -127,6 +192,9 @@ static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word
    }
 
 }
+#endif
+
+
 
 #endif
 
@@ -288,7 +356,7 @@ int cdbk_offset
    x[0]=tmp;
    x[1]=tmp+nsf;
    x[2]=tmp+2*nsf;
-
+   
    e[0]=tmp2;
    e[1]=tmp2+nsf;
    e[2]=tmp2+2*nsf;