Using reciprocal approximation instead of full 32-bit division in alg_quant()
authorJean-Marc Valin <Jean-Marc.Valin@csiro.au>
Wed, 12 Mar 2008 02:26:37 +0000 (13:26 +1100)
committerJean-Marc Valin <Jean-Marc.Valin@csiro.au>
Wed, 12 Mar 2008 02:26:37 +0000 (13:26 +1100)
libcelt/_kiss_fft_guts.h
libcelt/arch.h
libcelt/fixed_debug.h
libcelt/fixed_generic.h
libcelt/vq.c

index 8cc6cba..73ac7d6 100644 (file)
@@ -89,11 +89,6 @@ struct kiss_fft_state{
 
 #ifdef MIXED_PRECISION
 
-#undef MULT16_32_Q15
-#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
-/*#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))*/
-#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
-
 #   define S_MUL(a,b) MULT16_32_Q15(b, a)
 
 #   define C_MUL(m,a,b) \
index 029de88..83e48ab 100644 (file)
@@ -177,6 +177,8 @@ typedef float celt_mask_t;
 #define MULT16_32_Q15(a,b)     ((a)*(b))
 #define MULT16_32_P15(a,b)     ((a)*(b))
 
+#define MULT32_32_Q31(a,b)     ((a)*(b))
+
 #define MAC16_32_Q11(c,a,b)     ((c)+(a)*(b))
 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
 
index e82d28b..136f4d7 100644 (file)
@@ -41,6 +41,9 @@
 static long long celt_mips = 0;
 #define MIPS_INC celt_mips++,
 
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
 #define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 #define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 
index 58fc0a2..f0490a1 100644 (file)
 #ifndef FIXED_GENERIC_H
 #define FIXED_GENERIC_H
 
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
+
 #define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 #define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 
@@ -81,7 +88,6 @@
 #define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
 
 #define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
-#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
 
 
index 0fe2526..37af3b4 100644 (file)
@@ -226,9 +226,12 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
                Ryp = yp[m] + MULT16_16(spj, SUB16(QCONST16(1.f,14),MULT16_16_Q15(alpha,Rpp)));
                
                /* Compute the gain such that ||p + g*y|| = 1 */
-               g = DIV32(SHL32(celt_sqrt(MULT16_16(ROUND(Ryp,14),ROUND(Ryp,14)) + Ryy - MULT16_16(ROUND(Ryy,14),Rpp)) - ROUND(Ryp,14),14),ROUND(Ryy,14));
-               
-               /* Knowing that gain, what the error: (x-g*y)^2 
+               g = MULT32_32_Q31(
+                     SHL32(celt_sqrt(MULT16_16(ROUND(Ryp,14),ROUND(Ryp,14)) + Ryy -
+                                     MULT16_16(ROUND(Ryy,14),Rpp))
+                           - ROUND(Ryp,14), 14),
+                     celt_rcp(ROUND(Ryy,14)));
+               /* Knowing that gain, what's the error: (x-g*y)^2 
                   (result is negated and we discard x^2 because it's constant) */
                /*score = 2.f*g*Rxy - 1.f*g*g*Ryy*NORM_SCALING_1;*/
                score = 2*MULT16_32_Q14(ROUND(Rxy,14),g) -