stream_encoder_intrin_sse[23].c : Optimize of int32 -> uint64 conversion.
authorErik de Castro Lopo <erikd@mega-nerd.com>
Sat, 20 Sep 2014 22:48:17 +0000 (08:48 +1000)
committerErik de Castro Lopo <erikd@mega-nerd.com>
Sat, 20 Sep 2014 22:48:20 +0000 (08:48 +1000)
Optimizes int32 -> uint64 conversion by doing zero extension (int32 ->
uint32 -> uint64) instead of sign extension (int32 -> int64 -> uint64).

Patch-from: lvqcl <lvqcl.mail@gmail.com>

src/libFLAC/stream_encoder_intrin_sse2.c
src/libFLAC/stream_encoder_intrin_ssse3.c

index 4e9d5db..3a06392 100644 (file)
@@ -95,7 +95,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
 
                                mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 8));
                                mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 4));
-                               abs_residual_partition_sums[partition] = _mm_cvtsi128_si32(mm_sum);
+                               abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum);
                        }
                }
                else { /* have to pessimistically use 64 bits for accumulator */
index 669536a..f6a27ba 100644 (file)
@@ -89,7 +89,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
 
                                mm_sum = _mm_hadd_epi32(mm_sum, mm_sum);
                                mm_sum = _mm_hadd_epi32(mm_sum, mm_sum);
-                               abs_residual_partition_sums[partition] = _mm_cvtsi128_si32(mm_sum);
+                               abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum);
                        }
                }
                else { /* have to pessimistically use 64 bits for accumulator */