libFLAC: Add a workaround for a bug in MSVC2105 update2
authorErik de Castro Lopo <erikd@mega-nerd.com>
Thu, 5 May 2016 07:21:20 +0000 (17:21 +1000)
committerErik de Castro Lopo <erikd@mega-nerd.com>
Thu, 5 May 2016 07:23:52 +0000 (17:23 +1000)
MSVC2105 update2 compiles the C code:

    abs_residual_partition_sums[partition] =
                  (FLAC__uint32)_mm_cvtsi128_si32(mm_sum);

into this:

    movq    QWORD PTR [rsi], xmm2

while it should be:

    movd    eax, xmm2
    mov     QWORD PTR [rsi], rax

With this patch, MSVC emits:

    movq    QWORD PTR [rsi], xmm2
    mov     DWORD PTR [rsi+4], r9d

so the price of this workaround is 1 extra write instruction per
partition.

Patch-from: lvqcl <lvqcl.mail@gmail.com>

src/libFLAC/stream_encoder_intrin_avx2.c
src/libFLAC/stream_encoder_intrin_sse2.c
src/libFLAC/stream_encoder_intrin_ssse3.c

index d0cd0e4..4730bf5 100644 (file)
@@ -84,6 +84,10 @@ void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual
                                sum128 = _mm_hadd_epi32(sum128, sum128);
                                sum128 = _mm_hadd_epi32(sum128, sum128);
                                abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(sum128);
+/* workaround for a bug in MSVC2015U2 - see https://connect.microsoft.com/VisualStudio/feedback/details/2659191/incorrect-code-generation-for-x86-64 */
+#if (defined _MSC_VER) && (_MSC_FULL_VER == 190023918) && (defined FLAC__CPU_X86_64)
+                               abs_residual_partition_sums[partition] &= 0xFFFFFFFF; /**/
+#endif
                        }
                }
                else { /* have to pessimistically use 64 bits for accumulator */
index ed01978..55f7664 100644 (file)
@@ -98,6 +98,10 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
                                mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 8));
                                mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 4));
                                abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum);
+/* workaround for a bug in MSVC2015U2 - see https://connect.microsoft.com/VisualStudio/feedback/details/2659191/incorrect-code-generation-for-x86-64 */
+#if (defined _MSC_VER) && (_MSC_FULL_VER == 190023918) && (defined FLAC__CPU_X86_64)
+                               abs_residual_partition_sums[partition] &= 0xFFFFFFFF;
+#endif
                        }
                }
                else { /* have to pessimistically use 64 bits for accumulator */
index 465950b..551ffd3 100644 (file)
@@ -87,6 +87,10 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
                                mm_sum = _mm_hadd_epi32(mm_sum, mm_sum);
                                mm_sum = _mm_hadd_epi32(mm_sum, mm_sum);
                                abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum);
+/* workaround for a bug in MSVC2015U2 - see https://connect.microsoft.com/VisualStudio/feedback/details/2659191/incorrect-code-generation-for-x86-64 */
+#if (defined _MSC_VER) && (_MSC_FULL_VER == 190023918) && (defined FLAC__CPU_X86_64)
+                               abs_residual_partition_sums[partition] &= 0xFFFFFFFF;
+#endif
                        }
                }
                else { /* have to pessimistically use 64 bits for accumulator */