Add CPU features (sse3, ssse3) detection code for x86-64.
authorErik de Castro Lopo <erikd@mega-nerd.com>
Sat, 14 Sep 2013 23:46:17 +0000 (09:46 +1000)
committerErik de Castro Lopo <erikd@mega-nerd.com>
Sat, 14 Sep 2013 23:46:20 +0000 (09:46 +1000)
Patch-from: lvqcl <lvqcl.mail@gmail.com>

src/libFLAC/cpu.c
src/libFLAC/include/private/cpu.h

index b7a7a3a..5a45348 100644 (file)
@@ -175,7 +175,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
        info->data.ia32.ext3dnow = false;
        info->data.ia32.extmmx = false;
        if(info->data.ia32.cpuid) {
-               /* http://www.sandpile.org/ia32/cpuid.htm */
+               /* http://www.sandpile.org/x86/cpuid.htm */
                FLAC__uint32 flags_edx, flags_ecx;
                FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
                info->data.ia32.cmov  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
@@ -283,12 +283,12 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
                                info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
 #elif defined(_MSC_VER)
 # ifdef USE_TRY_CATCH_FLAVOR
-                       _try {
+                       __try {
                                __asm {
                                        xorps xmm0,xmm0
                                }
                        }
-                       _except(EXCEPTION_EXECUTE_HANDLER) {
+                       __except(EXCEPTION_EXECUTE_HANDLER) {
                                if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
                                        info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
                        }
@@ -331,6 +331,32 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
 #endif
 
 /*
+ * x86-64-specific
+ */
+#elif defined FLAC__CPU_X86_64
+       info->type = FLAC__CPUINFO_TYPE_X86_64;
+#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN
+       info->use_asm = true;
+       info->data.x86_64.sse3 = false;
+       info->data.x86_64.ssse3 = false;
+       {
+               /* http://www.sandpile.org/x86/cpuid.htm */
+               FLAC__uint32 flags_edx, flags_ecx;
+               FLAC__cpu_info_x86(&flags_edx, &flags_ecx);
+               info->data.x86_64.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
+               info->data.x86_64.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
+       }
+#ifdef DEBUG
+       fprintf(stderr, "CPU info (x86-64):\n");
+       fprintf(stderr, "  SSE3 ....... %c\n", info->data.x86_64.sse3    ? 'Y' : 'n');
+       fprintf(stderr, "  SSSE3 ...... %c\n", info->data.x86_64.ssse3   ? 'Y' : 'n');
+#endif
+
+#else
+       info->use_asm = false;
+#endif
+
+/*
  * PPC-specific
  */
 #elif defined FLAC__CPU_PPC
@@ -396,10 +422,40 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
 # endif
 
 /*
- * unknown CPI
+ * unknown CPU
  */
 #else
        info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
        info->use_asm = false;
 #endif
 }
+
+#if defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64
+#ifdef FLAC__HAS_X86INTRIN
+
+#if defined _MSC_VER && (_MSC_VER >= 1400)
+#include <intrin.h> /* for __cpuid() */
+#endif
+
+void FLAC__cpu_info_x86(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx)
+{
+#if defined _MSC_VER && (_MSC_VER >= 1400)
+               int cpuinfo[4];
+               __cpuid(cpuinfo, 1);
+               *flags_ecx = cpuinfo[2];
+               *flags_edx = cpuinfo[3];
+#elif defined __GNUC__ && __GNUC__
+               FLAC__uint32 info = 1, flags_eax, flags_ebx;
+               __asm__ __volatile__ (
+                       "xchg %%ebx, %%edi;"
+                       "cpuid;"
+                       "xchg %%edi, %%ebx;"
+                       :"=a" (flags_eax), "=D" (flags_ebx), "=c" (*flags_ecx), "=d" (*flags_edx)
+                       :"a" (info)
+               );
+#else
+               *flags_ecx = *flags_edx = 0;
+#endif
+}
+#endif /* FLAC__HAS_X86INTRIN */
+#endif /* FLAC__CPU_IA32 || FLAC__CPU_X86_64 */
index d920a27..90bf946 100644 (file)
@@ -41,6 +41,7 @@
 
 typedef enum {
        FLAC__CPUINFO_TYPE_IA32,
+       FLAC__CPUINFO_TYPE_X86_64,
        FLAC__CPUINFO_TYPE_PPC,
        FLAC__CPUINFO_TYPE_UNKNOWN
 } FLAC__CPUInfo_Type;
@@ -61,6 +62,11 @@ typedef struct {
 } FLAC__CPUInfo_IA32;
 
 typedef struct {
+       FLAC__bool sse3;
+       FLAC__bool ssse3;
+} FLAC__CPUInfo_x86_64;
+
+typedef struct {
        FLAC__bool altivec;
        FLAC__bool ppc64;
 } FLAC__CPUInfo_PPC;
@@ -70,6 +76,7 @@ typedef struct {
        FLAC__CPUInfo_Type type;
        union {
                FLAC__CPUInfo_IA32 ia32;
+               FLAC__CPUInfo_x86_64 x86_64;
                FLAC__CPUInfo_PPC ppc;
        } data;
 } FLAC__CPUInfo;
@@ -77,6 +84,7 @@ typedef struct {
 void FLAC__cpu_info(FLAC__CPUInfo *info);
 
 #ifndef FLAC__NO_ASM
+
 #ifdef FLAC__CPU_IA32
 #ifdef FLAC__HAS_NASM
 FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32(void);
@@ -84,6 +92,13 @@ void         FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flag
 FLAC__uint32 FLAC__cpu_info_extended_amd_asm_ia32(void);
 #endif
 #endif
+
+#if defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64
+#ifdef FLAC__HAS_X86INTRIN
+void FLAC__cpu_info_x86(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx);
+#endif
+#endif
+
 #endif
 
 #endif