Adds use of restrict keyword to improve encoding speed.
authorErik de Castro Lopo <erikd@mega-nerd.com>
Thu, 10 Oct 2013 07:24:19 +0000 (18:24 +1100)
committerErik de Castro Lopo <erikd@mega-nerd.com>
Thu, 10 Oct 2013 07:24:19 +0000 (18:24 +1100)
Restrict works very poorly in Visual Studio (much slower than without)
so defined flac_restrict in share/compat.h and use that in:

    lpc_compute_residual...()
    lpc_restore_signal...()

As a result, FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41()
offers no advantage for 64-bit compiles and was removed from x86-64 part
of stream_encoder.c

Patch-from: lvqcl <lvqcl.mail@gmail.com>

include/share/compat.h
src/libFLAC/lpc.c
src/libFLAC/stream_encoder.c

index a4b48ac..a7447aa 100644 (file)
 #define inline __inline
 #endif
 
+#if defined __INTEL_COMPILER || (defined _MSC_VER && defined _WIN64)
+/* MSVS generates VERY slow 32-bit code with __restrict */
+#define flac_restrict __restrict
+#elif defined __GNUC__
+#define flac_restrict __restrict__
+#else
+#define flac_restrict
+#endif
+
 /* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
 #ifdef _MSC_VER
 #define FLAC__U64L(x) x
index 22f35b2..22aab4a 100644 (file)
@@ -260,7 +260,7 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order,
        return 0;
 }
 
-void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
        FLAC__int64 sumo;
@@ -520,7 +520,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, u
 }
 #endif
 
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
        unsigned i, j;
@@ -776,7 +776,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *da
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
 
-void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+void FLAC__lpc_restore_signal(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
        FLAC__int64 sumo;
@@ -1037,7 +1037,7 @@ void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, c
 }
 #endif
 
-void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
        unsigned i, j;
index 53f09b1..661b412 100644 (file)
@@ -942,10 +942,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
 
                encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
                encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
-#    ifdef FLAC__SSE4_SUPPORTED
-               if(encoder->private_->cpuinfo.x86_64.sse41)
-                       encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41;
-#    endif
 #   endif /* FLAC__HAS_X86INTRIN */
 #  endif /* FLAC__CPU_... */
        }