util: Cleanup u_cpu_detect, build. Support X86_64 and detect SSE4.1 too.
authorJosé Fonseca <jfonseca@vmware.com>
Tue, 29 Sep 2009 12:25:08 +0000 (13:25 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Tue, 29 Sep 2009 12:59:16 +0000 (13:59 +0100)
I was waiting for the need to use this code to arise, and it finally came.

I've tested building this on Linux and Windows, both x86 and x64_64. But
it might break other platforms. Please bear with me and help me fix it.

Many thanks to Dennis Smit who submitted this, and Eric Anholt whose
work this was based on.

src/gallium/auxiliary/util/Makefile
src/gallium/auxiliary/util/SConscript
src/gallium/auxiliary/util/u_cpu_detect.c
src/gallium/auxiliary/util/u_cpu_detect.h

index ae8d330a78716d8817589eb9a9d32fe5027c3514..1d8bb55bbd68d4bd9bdae78f69bd2ed268007fca 100644 (file)
@@ -10,6 +10,7 @@ C_SOURCES = \
        u_debug_stack.c \
        u_blit.c \
        u_cache.c \
+       u_cpu_detect.c \
        u_draw_quad.c \
        u_format.c \
        u_format_access.c \
index 28a5ab42569df1f9015eb5331cb1406f1ac49940..2187935fa40f0fc699bd8d13aa241fe7c12ae822 100644 (file)
@@ -24,6 +24,7 @@ util = env.ConvenienceLibrary(
                'u_bitmask.c',
                'u_blit.c',
                'u_cache.c',
+               'u_cpu_detect.c',
                'u_debug.c',
                'u_debug_dump.c',
                'u_debug_memory.c',
index d9f2f8fc288c3825aa6ef05a56f9ba87d525b47f..ecfb96138d6e0de374d9e95b4899c10fa10845b6 100644 (file)
  * 
  **************************************************************************/
 
-/*
- * Based on the work of Eric Anholt <anholt@FreeBSD.org>
+/**
+ * @file
+ * CPU feature detection.
+ *
+ * @author Dennis Smit
+ * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
  */
 
-/* FIXME: clean this entire file up */
+#include "pipe/p_config.h"
 
+#include "u_debug.h"
 #include "u_cpu_detect.h"
 
-#ifdef __linux__
-#define OS_LINUX
-#endif
-#ifdef WIN32
-#define OS_WIN32
-#endif
-
-#if defined(ARCH_POWERPC)
-#if defined(OS_DARWIN)
+#if defined(PIPE_ARCH_PPC)
+#if defined(PIPE_OS_DARWIN)
 #include <sys/sysctl.h>
 #else
 #include <signal.h>
 #endif
 #endif
 
-#if defined(OS_NETBSD) || defined(OS_OPENBSD)
+#if defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <machine/cpu.h>
 #endif
 
-#if defined(OS_FREEBSD)
+#if defined(PIPE_OS_FREEBSD)
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #endif
 
-#if defined(OS_LINUX)
+#if defined(PIPE_OS_LINUX)
 #include <signal.h>
 #endif
 
-#if defined(OS_WIN32)
-#include <windows.h>
+#ifdef PIPE_OS_UNIX
+#include <unistd.h>
 #endif
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
+#if defined(PIPE_OS_WINDOWS)
+#include <windows.h>
+#endif
 
 
-static struct cpu_detect_caps __cpu_detect_caps;
-static int __cpu_detect_initialized = 0;
+struct util_cpu_caps util_cpu_caps;
 
 static int has_cpuid(void);
 static int cpuid(unsigned int ax, unsigned int *p);
 
+#if defined(PIPE_ARCH_X86)
+
 /* The sigill handlers */
-#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */
-#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
-static void sigill_handler_sse(int signal, struct sigcontext sc)
+#if defined(PIPE_OS_LINUX) //&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
+static void
+sigill_handler_sse(int signal, struct sigcontext sc)
 {
-       /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
-        * instructions are 3 bytes long.  We must increment the instruction
-        * pointer manually to avoid repeated execution of the offending
-        * instruction.
-        *
-        * If the SIGILL is caused by a divide-by-zero when unmasked
-        * exceptions aren't supported, the SIMD FPU status and control
-        * word will be restored at the end of the test, so we don't need
-        * to worry about doing it here.  Besides, we may not be able to...
-        */
-       sc.eip += 3;
-
-       __cpu_detect_caps.hasSSE=0;
+   /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
+    * instructions are 3 bytes long.  We must increment the instruction
+    * pointer manually to avoid repeated execution of the offending
+    * instruction.
+    *
+    * If the SIGILL is caused by a divide-by-zero when unmasked
+    * exceptions aren't supported, the SIMD FPU status and control
+    * word will be restored at the end of the test, so we don't need
+    * to worry about doing it here.  Besides, we may not be able to...
+    */
+   sc.eip += 3;
+
+   util_cpu_caps.has_sse=0;
 }
 
-static void sigfpe_handler_sse(int signal, struct sigcontext sc)
+static void
+sigfpe_handler_sse(int signal, struct sigcontext sc)
 {
-       if (sc.fpstate->magic != 0xffff) {
-               /* Our signal context has the extended FPU state, so reset the
-                * divide-by-zero exception mask and clear the divide-by-zero
-                * exception bit.
-                */
-               sc.fpstate->mxcsr |= 0x00000200;
-               sc.fpstate->mxcsr &= 0xfffffffb;
-       } else {
-               /* If we ever get here, we're completely hosed.
-               */
-       }
+   if (sc.fpstate->magic != 0xffff) {
+      /* Our signal context has the extended FPU state, so reset the
+       * divide-by-zero exception mask and clear the divide-by-zero
+       * exception bit.
+       */
+      sc.fpstate->mxcsr |= 0x00000200;
+      sc.fpstate->mxcsr &= 0xfffffffb;
+   } else {
+      /* If we ever get here, we're completely hosed.
+      */
+   }
 }
-#endif
-#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
+#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
 
-#if defined(OS_WIN32)
-LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
+#if defined(PIPE_OS_WINDOWS)
+static LONG CALLBACK
+win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
 {
-       if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
-               ep->ContextRecord->Eip +=3;
-               __cpu_detect_caps.hasSSE=0;
-               return EXCEPTION_CONTINUE_EXECUTION;
-       }
-       return EXCEPTION_CONTINUE_SEARCH;
+   if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
+      ep->ContextRecord->Eip +=3;
+      util_cpu_caps.has_sse=0;
+      return EXCEPTION_CONTINUE_EXECUTION;
+   }
+   return EXCEPTION_CONTINUE_SEARCH;
 }
-#endif /* OS_WIN32 */
+#endif /* PIPE_OS_WINDOWS */
+
+#endif /* PIPE_ARCH_X86 */
 
 
-#if defined(ARCH_POWERPC) && !defined(OS_DARWIN)
+#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN)
 static sigjmp_buf __lv_powerpc_jmpbuf;
 static volatile sig_atomic_t __lv_powerpc_canjump = 0;
 
-static void sigill_handler (int sig);
-
-static void sigill_handler (int sig)
+static void
+sigill_handler(int sig)
 {
-       if (!__lv_powerpc_canjump) {
-               signal (sig, SIG_DFL);
-               raise (sig);
-       }
+   if (!__lv_powerpc_canjump) {
+      signal (sig, SIG_DFL);
+      raise (sig);
+   }
 
-       __lv_powerpc_canjump = 0;
-       siglongjmp(__lv_powerpc_jmpbuf, 1);
+   __lv_powerpc_canjump = 0;
+   siglongjmp(__lv_powerpc_jmpbuf, 1);
 }
 
-static void check_os_altivec_support(void)
+static void
+check_os_altivec_support(void)
 {
-#if defined(OS_DARWIN)
-       int sels[2] = {CTL_HW, HW_VECTORUNIT};
-       int has_vu = 0;
-       int len = sizeof (has_vu);
-       int err;
-
-       err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
-
-       if (err == 0) {
-               if (has_vu != 0) {
-                       __cpu_detect_caps.hasAltiVec = 1;
-               }
-       }
-#else /* !OS_DARWIN */
-       /* no Darwin, do it the brute-force way */
-       /* this is borrowed from the libmpeg2 library */
-       signal(SIGILL, sigill_handler);
-       if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) {
-               signal(SIGILL, SIG_DFL);
-       } else {
-               __lv_powerpc_canjump = 1;
-
-               __asm __volatile
-                       ("mtspr 256, %0\n\t"
-                        "vand %%v0, %%v0, %%v0"
-                        :
-                        : "r" (-1));
-
-               signal(SIGILL, SIG_DFL);
-               __cpu_detect_caps.hasAltiVec = 1;
-       }
+#if defined(PIPE_OS_DARWIN)
+   int sels[2] = {CTL_HW, HW_VECTORUNIT};
+   int has_vu = 0;
+   int len = sizeof (has_vu);
+   int err;
+
+   err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
+
+   if (err == 0) {
+      if (has_vu != 0) {
+         util_cpu_caps.has_altivec = 1;
+      }
+   }
+#else /* !PIPE_OS_DARWIN */
+   /* no Darwin, do it the brute-force way */
+   /* this is borrowed from the libmpeg2 library */
+   signal(SIGILL, sigill_handler);
+   if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) {
+      signal(SIGILL, SIG_DFL);
+   } else {
+      __lv_powerpc_canjump = 1;
+
+      __asm __volatile
+         ("mtspr 256, %0\n\t"
+          "vand %%v0, %%v0, %%v0"
+          :
+          : "r" (-1));
+
+      signal(SIGILL, SIG_DFL);
+      util_cpu_caps.has_altivec = 1;
+   }
 #endif
 }
 #endif
@@ -189,318 +190,312 @@ static void check_os_altivec_support(void)
  * and RedHat patched 2.2 kernels that have broken exception handling
  * support for user space apps that do SSE.
  */
-static void check_os_katmai_support(void)
+static void
+check_os_katmai_support(void)
 {
-#if defined(ARCH_X86)
-#if defined(OS_FREEBSD)
-       int has_sse=0, ret;
-       int len = sizeof (has_sse);
-
-       ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
-       if (ret || !has_sse)
-               __cpu_detect_caps.hasSSE=0;
-
-#elif defined(OS_NETBSD) || defined(OS_OPENBSD)
-       int has_sse, has_sse2, ret, mib[2];
-       int varlen;
-
-       mib[0] = CTL_MACHDEP;
-       mib[1] = CPU_SSE;
-       varlen = sizeof (has_sse);
-
-       ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
-       if (ret < 0 || !has_sse) {
-               __cpu_detect_caps.hasSSE = 0;
-       } else {
-               __cpu_detect_caps.hasSSE = 1;
-       }
-
-       mib[1] = CPU_SSE2;
-       varlen = sizeof (has_sse2);
-       ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
-       if (ret < 0 || !has_sse2) {
-               __cpu_detect_caps.hasSSE2 = 0;
-       } else {
-               __cpu_detect_caps.hasSSE2 = 1;
-       }
-       __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */
-
-#elif defined(OS_WIN32)
-       LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
-       if (__cpu_detect_caps.hasSSE) {
-               exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
-               __asm __volatile ("xorps %xmm0, %xmm0");
-               SetUnhandledExceptionFilter(exc_fil);
-       }
-#elif defined(OS_LINUX)
-       struct sigaction saved_sigill;
-       struct sigaction saved_sigfpe;
-
-       /* Save the original signal handlers.
-       */
-       sigaction(SIGILL, NULL, &saved_sigill);
-       sigaction(SIGFPE, NULL, &saved_sigfpe);
-
-       signal(SIGILL, (void (*)(int))sigill_handler_sse);
-       signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
-
-       /* Emulate test for OSFXSR in CR4.  The OS will set this bit if it
-        * supports the extended FPU save and restore required for SSE.  If
-        * we execute an SSE instruction on a PIII and get a SIGILL, the OS
-        * doesn't support Streaming SIMD Exceptions, even if the processor
-        * does.
-        */
-       if (__cpu_detect_caps.hasSSE) {
-               __asm __volatile ("xorps %xmm1, %xmm0");
-       }
-
-       /* Emulate test for OSXMMEXCPT in CR4.  The OS will set this bit if
-        * it supports unmasked SIMD FPU exceptions.  If we unmask the
-        * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
-        * doesn't support unmasked SIMD FPU exceptions.  If we get a SIGFPE
-        * as expected, we're okay but we need to clean up after it.
-        *
-        * Are we being too stringent in our requirement that the OS support
-        * unmasked exceptions?  Certain RedHat 2.2 kernels enable SSE by
-        * setting CR4.OSFXSR but don't support unmasked exceptions.  Win98
-        * doesn't even support them.  We at least know the user-space SSE
-        * support is good in kernels that do support unmasked exceptions,
-        * and therefore to be safe I'm going to leave this test in here.
-        */
-       if (__cpu_detect_caps.hasSSE) {
-               //      test_os_katmai_exception_support();
-       }
-
-       /* Restore the original signal handlers.
-       */
-       sigaction(SIGILL, &saved_sigill, NULL);
-       sigaction(SIGFPE, &saved_sigfpe, NULL);
+#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_OS_FREEBSD)
+   int has_sse=0, ret;
+   int len = sizeof (has_sse);
+
+   ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
+   if (ret || !has_sse)
+      util_cpu_caps.has_sse=0;
+
+#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
+   int has_sse, has_sse2, ret, mib[2];
+   int varlen;
+
+   mib[0] = CTL_MACHDEP;
+   mib[1] = CPU_SSE;
+   varlen = sizeof (has_sse);
+
+   ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
+   if (ret < 0 || !has_sse) {
+      util_cpu_caps.has_sse = 0;
+   } else {
+      util_cpu_caps.has_sse = 1;
+   }
+
+   mib[1] = CPU_SSE2;
+   varlen = sizeof (has_sse2);
+   ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
+   if (ret < 0 || !has_sse2) {
+      util_cpu_caps.has_sse2 = 0;
+   } else {
+      util_cpu_caps.has_sse2 = 1;
+   }
+   util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
+
+#elif defined(PIPE_OS_WINDOWS)
+   LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
+   if (util_cpu_caps.has_sse) {
+      exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
+#if defined(PIPE_CC_GCC)
+      __asm __volatile ("xorps %xmm0, %xmm0");
+#elif defined(PIPE_CC_MSVC)
+      __asm {
+          xorps xmm0, xmm0        // executing SSE instruction
+      }
+#else
+#error Unsupported compiler
+#endif
+      SetUnhandledExceptionFilter(exc_fil);
+   }
+#elif defined(PIPE_OS_LINUX)
+   struct sigaction saved_sigill;
+   struct sigaction saved_sigfpe;
+
+   /* Save the original signal handlers.
+   */
+   sigaction(SIGILL, NULL, &saved_sigill);
+   sigaction(SIGFPE, NULL, &saved_sigfpe);
+
+   signal(SIGILL, (void (*)(int))sigill_handler_sse);
+   signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
+
+   /* Emulate test for OSFXSR in CR4.  The OS will set this bit if it
+    * supports the extended FPU save and restore required for SSE.  If
+    * we execute an SSE instruction on a PIII and get a SIGILL, the OS
+    * doesn't support Streaming SIMD Exceptions, even if the processor
+    * does.
+    */
+   if (util_cpu_caps.has_sse) {
+      __asm __volatile ("xorps %xmm1, %xmm0");
+   }
+
+   /* Emulate test for OSXMMEXCPT in CR4.  The OS will set this bit if
+    * it supports unmasked SIMD FPU exceptions.  If we unmask the
+    * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
+    * doesn't support unmasked SIMD FPU exceptions.  If we get a SIGFPE
+    * as expected, we're okay but we need to clean up after it.
+    *
+    * Are we being too stringent in our requirement that the OS support
+    * unmasked exceptions?  Certain RedHat 2.2 kernels enable SSE by
+    * setting CR4.OSFXSR but don't support unmasked exceptions.  Win98
+    * doesn't even support them.  We at least know the user-space SSE
+    * support is good in kernels that do support unmasked exceptions,
+    * and therefore to be safe I'm going to leave this test in here.
+    */
+   if (util_cpu_caps.has_sse) {
+      //      test_os_katmai_exception_support();
+   }
+
+   /* Restore the original signal handlers.
+   */
+   sigaction(SIGILL, &saved_sigill, NULL);
+   sigaction(SIGFPE, &saved_sigfpe, NULL);
 
 #else
-       /* We can't use POSIX signal handling to test the availability of
-        * SSE, so we disable it by default.
-        */
-       __cpu_detect_caps.hasSSE = 0;
+   /* We can't use POSIX signal handling to test the availability of
+    * SSE, so we disable it by default.
+    */
+   util_cpu_caps.has_sse = 0;
 #endif /* __linux__ */
 #endif
+
+#if defined(PIPE_ARCH_X86_64)
+   util_cpu_caps.has_sse = 1;
+#endif
 }
 
 
 static int has_cpuid(void)
 {
-#if defined(ARCH_X86)
-       int a, c;
-
-       __asm __volatile
-               ("pushf\n"
-                "popl %0\n"
-                "movl %0, %1\n"
-                "xorl $0x200000, %0\n"
-                "push %0\n"
-                "popf\n"
-                "pushf\n"
-                "popl %0\n"
-                : "=a" (a), "=c" (c)
-                :
-                : "cc");
-
-       return a != c;
+#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_OS_GCC)
+   int a, c;
+
+   __asm __volatile
+      ("pushf\n"
+       "popl %0\n"
+       "movl %0, %1\n"
+       "xorl $0x200000, %0\n"
+       "push %0\n"
+       "popf\n"
+       "pushf\n"
+       "popl %0\n"
+       : "=a" (a), "=c" (c)
+       :
+       : "cc");
+
+   return a != c;
+#else
+   /* FIXME */
+   return 1;
+#endif
+#elif defined(PIPE_ARCH_X86_64)
+   return 1;
 #else
-       return 0;
+   return 0;
 #endif
 }
 
-static int cpuid(unsigned int ax, unsigned int *p)
+static INLINE int
+cpuid(unsigned int ax, unsigned int *p)
 {
-#if defined(ARCH_X86)
-       unsigned int flags;
-
-       __asm __volatile
-               ("movl %%ebx, %%esi\n\t"
-                "cpuid\n\t"
-                "xchgl %%ebx, %%esi"
-                : "=a" (p[0]), "=S" (p[1]),
-                "=c" (p[2]), "=d" (p[3])
-                : "0" (ax));
-
-       return 0;
-#else
-       return -1;
+   int ret = -1;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+#if defined(PIPE_CC_GCC)
+   __asm __volatile
+      ("movl %%ebx, %%esi\n\t"
+       "cpuid\n\t"
+       "xchgl %%ebx, %%esi"
+       : "=a" (p[0]), "=S" (p[1]),
+       "=c" (p[2]), "=d" (p[3])
+       : "0" (ax));
+
+   ret = 0;
+#elif defined(PIPE_CC_MSVC)
+   __cpuid(ax, p);
+
+   ret = 0;
+#endif
 #endif
+
+   return ret;
 }
 
-void cpu_detect_initialize()
+void
+util_cpu_detect(void)
 {
-       unsigned int regs[4];
-       unsigned int regs2[4];
-
-       int mib[2], ncpu;
-       int len;
-
-       memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps));
-
-       /* Check for arch type */
-#if defined(ARCH_MIPS)
-       __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS;
-#elif defined(ARCH_ALPHA)
-       __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA;
-#elif defined(ARCH_SPARC)
-       __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC;
-#elif defined(ARCH_X86)
-       __cpu_detect_caps.type = CPU_DETECT_TYPE_X86;
-#elif defined(ARCH_POWERPC)
-       __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC;
+   static boolean util_cpu_detect_initialized = FALSE;
+
+   if(util_cpu_detect_initialized)
+      return;
+
+   memset(&util_cpu_caps, 0, sizeof util_cpu_caps);
+
+   /* Check for arch type */
+#if defined(PIPE_ARCH_MIPS)
+   util_cpu_caps.arch = UTIL_CPU_ARCH_MIPS;
+#elif defined(PIPE_ARCH_ALPHA)
+   util_cpu_caps.arch = UTIL_CPU_ARCH_ALPHA;
+#elif defined(PIPE_ARCH_SPARC)
+   util_cpu_caps.arch = UTIL_CPU_ARCH_SPARC;
+#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   util_cpu_caps.arch = UTIL_CPU_ARCH_X86;
+#elif defined(PIPE_ARCH_PPC)
+   util_cpu_caps.arch = UTIL_CPU_ARCH_POWERPC;
 #else
-       __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER;
+   util_cpu_caps.arch = UTIL_CPU_ARCH_UNKNOWN;
 #endif
 
-       /* Count the number of CPUs in system */
-#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN)
-       __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN);
-       if (__cpu_detect_caps.nrcpu == -1)
-               __cpu_detect_caps.nrcpu = 1;
-
-#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD)
+   /* Count the number of CPUs in system */
+#if !defined(PIPE_OS_WINDOWS) && !defined(PIPE_OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN)
+   util_cpu_caps.nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+   if (util_cpu_caps.nr_cpus == -1)
+      util_cpu_caps.nr_cpus = 1;
 
-       mib[0] = CTL_HW;
-       mib[1] = HW_NCPU;
+#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_OPENBSD)
+   {
+      int mib[2], ncpu;
+      int len;
 
-       len = sizeof (ncpu);
-       sysctl(mib, 2, &ncpu, &len, NULL, 0);
-       __cpu_detect_caps.nrcpu = ncpu;
+      mib[0] = CTL_HW;
+      mib[1] = HW_NCPU;
 
+      len = sizeof (ncpu);
+      sysctl(mib, 2, &ncpu, &len, NULL, 0);
+      util_cpu_caps.nr_cpus = ncpu;
+   }
 #else
-       __cpu_detect_caps.nrcpu = 1;
+   util_cpu_caps.nr_cpus = 1;
 #endif
 
-#if defined(ARCH_X86)
-       /* No cpuid, old 486 or lower */
-       if (has_cpuid() == 0)
-               return;
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   if (has_cpuid()) {
+      unsigned int regs[4];
+      unsigned int regs2[4];
 
-       __cpu_detect_caps.cacheline = 32;
+      util_cpu_caps.cacheline = 32;
 
-       /* Get max cpuid level */
-       cpuid(0x00000000, regs);
+      /* Get max cpuid level */
+      cpuid(0x00000000, regs);
 
-       if (regs[0] >= 0x00000001) {
-               unsigned int cacheline;
+      if (regs[0] >= 0x00000001) {
+         unsigned int cacheline;
 
-               cpuid (0x00000001, regs2);
+         cpuid (0x00000001, regs2);
 
-               __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf;
-               if (__cpu_detect_caps.x86cpuType == 0xf)
-                   __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
+         util_cpu_caps.x86_cpu_type = (regs2[0] >> 8) & 0xf;
+         if (util_cpu_caps.x86_cpu_type == 0xf)
+             util_cpu_caps.x86_cpu_type = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */
 
-               /* general feature flags */
-               __cpu_detect_caps.hasTSC  = (regs2[3] & (1 << 8  )) >>  8; /* 0x0000010 */
-               __cpu_detect_caps.hasMMX  = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
-               __cpu_detect_caps.hasSSE  = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */
-               __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */
-               __cpu_detect_caps.hasSSE3 = (regs2[2] & (1));          /* 0x0000001 */
-               __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9;   /* 0x0000020 */
-               __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */
+         /* general feature flags */
+         util_cpu_caps.has_tsc    = (regs2[3] & (1 << 8  )) >>  8; /* 0x0000010 */
+         util_cpu_caps.has_mmx    = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
+         util_cpu_caps.has_sse    = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */
+         util_cpu_caps.has_sse2   = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */
+         util_cpu_caps.has_sse3   = (regs2[2] & (1));          /* 0x0000001 */
+         util_cpu_caps.has_ssse3  = (regs2[2] & (1 << 9 )) >> 9;   /* 0x0000020 */
+         util_cpu_caps.has_sse4_1 = (regs2[2] & (1 << 19)) >> 19;
+         util_cpu_caps.has_mmx2   = util_cpu_caps.has_sse; /* SSE cpus supports mmxext too */
 
-               cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
-               if (cacheline > 0)
-                       __cpu_detect_caps.cacheline = cacheline;
-       }
+         cacheline = ((regs2[1] >> 8) & 0xFF) * 8;
+         if (cacheline > 0)
+            util_cpu_caps.cacheline = cacheline;
+      }
 
-       cpuid(0x80000000, regs);
+      cpuid(0x80000000, regs);
 
-       if (regs[0] >= 0x80000001) {
+      if (regs[0] >= 0x80000001) {
 
-               cpuid(0x80000001, regs2);
+         cpuid(0x80000001, regs2);
 
-               __cpu_detect_caps.hasMMX  |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
-               __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */
-               __cpu_detect_caps.has3DNow    = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */
-               __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30;
-       }
+         util_cpu_caps.has_mmx  |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */
+         util_cpu_caps.has_mmx2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */
+         util_cpu_caps.has_3dnow    = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */
+         util_cpu_caps.has_3dnow_ext = (regs2[3] & (1 << 30 )) >> 30;
+      }
 
-       if (regs[0] >= 0x80000006) {
-               cpuid(0x80000006, regs2);
-               __cpu_detect_caps.cacheline = regs2[2] & 0xFF;
-       }
+      if (regs[0] >= 0x80000006) {
+         cpuid(0x80000006, regs2);
+         util_cpu_caps.cacheline = regs2[2] & 0xFF;
+      }
 
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_NETBSD) || defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_OPENBSD)
+      if (util_cpu_caps.has_sse)
+         check_os_katmai_support();
 
-#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD)
-       if (__cpu_detect_caps.hasSSE)
-               check_os_katmai_support();
-
-       if (!__cpu_detect_caps.hasSSE) {
-               __cpu_detect_caps.hasSSE2 = 0;
-               __cpu_detect_caps.hasSSE3 = 0;
-               __cpu_detect_caps.hasSSSE3 = 0;
-       }
+      if (!util_cpu_caps.has_sse) {
+         util_cpu_caps.has_sse2 = 0;
+         util_cpu_caps.has_sse3 = 0;
+         util_cpu_caps.has_ssse3 = 0;
+      }
 #else
-       __cpu_detect_caps.hasSSE = 0;
-       __cpu_detect_caps.hasSSE2 = 0;
-       __cpu_detect_caps.hasSSE3 = 0;
-       __cpu_detect_caps.hasSSSE3 = 0;
+      util_cpu_caps.has_sse = 0;
+      util_cpu_caps.has_sse2 = 0;
+      util_cpu_caps.has_sse3 = 0;
+      util_cpu_caps.has_ssse3 = 0;
+#endif
+   }
+#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+
+#if defined(PIPE_ARCH_PPC)
+   check_os_altivec_support();
+#endif /* PIPE_ARCH_PPC */
+
+#ifdef DEBUG
+   debug_printf("util_cpu_caps.arch = %i\n", util_cpu_caps.arch);
+   debug_printf("util_cpu_caps.nr_cpus = %u\n", util_cpu_caps.nr_cpus);
+
+   debug_printf("util_cpu_caps.x86_cpu_type = %u\n", util_cpu_caps.x86_cpu_type);
+   debug_printf("util_cpu_caps.cacheline = %u\n", util_cpu_caps.cacheline);
+
+   debug_printf("util_cpu_caps.has_tsc = %u\n", util_cpu_caps.has_tsc);
+   debug_printf("util_cpu_caps.has_mmx = %u\n", util_cpu_caps.has_mmx);
+   debug_printf("util_cpu_caps.has_mmx2 = %u\n", util_cpu_caps.has_mmx2);
+   debug_printf("util_cpu_caps.has_sse = %u\n", util_cpu_caps.has_sse);
+   debug_printf("util_cpu_caps.has_sse2 = %u\n", util_cpu_caps.has_sse2);
+   debug_printf("util_cpu_caps.has_sse3 = %u\n", util_cpu_caps.has_sse3);
+   debug_printf("util_cpu_caps.has_ssse3 = %u\n", util_cpu_caps.has_ssse3);
+   debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
+   debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
+   debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
+   debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
 #endif
-#endif /* ARCH_X86 */
-
-#if defined(ARCH_POWERPC)
-       check_os_altivec_support();
-#endif /* ARCH_POWERPC */
-
-       __cpu_detect_initialized = 1;
-}
-
-struct cpu_detect_caps *cpu_detect_get_caps()
-{
-       return &__cpu_detect_caps;
-}
-
-/* The getters and setters for feature flags */
-int cpu_detect_get_tsc()
-{
-       return __cpu_detect_caps.hasTSC;
-}
-
-int cpu_detect_get_mmx()
-{
-       return __cpu_detect_caps.hasMMX;
-}
-
-int cpu_detect_get_mmx2()
-{
-       return __cpu_detect_caps.hasMMX2;
-}
-
-int cpu_detect_get_sse()
-{
-       return __cpu_detect_caps.hasSSE;
-}
-
-int cpu_detect_get_sse2()
-{
-       return __cpu_detect_caps.hasSSE2;
-}
-
-int cpu_detect_get_sse3()
-{
-       return __cpu_detect_caps.hasSSE3;
-}
-
-int cpu_detect_get_ssse3()
-{
-       return __cpu_detect_caps.hasSSSE3;
-}
-
-int cpu_detect_get_3dnow()
-{
-       return __cpu_detect_caps.has3DNow;
-}
-
-int cpu_detect_get_3dnow2()
-{
-       return __cpu_detect_caps.has3DNowExt;
-}
 
-int cpu_detect_get_altivec()
-{
-       return __cpu_detect_caps.hasAltiVec;
+   util_cpu_detect_initialized = TRUE;
 }
-
index 1612d49286a81dc13b64e230f0ec6e032be2511d..7ea0121c07f27d89f1b023440b8bfde83dddb0eb 100644 (file)
  *
  ***************************************************************************/
 
-/*
- * Based on the work of Eric Anholt <anholt@FreeBSD.org>
+/**
+ * @file
+ * CPU feature detection.
+ *
+ * @author Dennis Smit
+ * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
  */
 
-#ifndef _CPU_DETECT_H
-#define _CPU_DETECT_H
+#ifndef _UTIL_CPU_DETECT_H
+#define _UTIL_CPU_DETECT_H
+
+#include "pipe/p_compiler.h"
 
-typedef enum {
-       CPU_DETECT_TYPE_MIPS,
-       CPU_DETECT_TYPE_ALPHA,
-       CPU_DETECT_TYPE_SPARC,
-       CPU_DETECT_TYPE_X86,
-       CPU_DETECT_TYPE_POWERPC,
-       CPU_DETECT_TYPE_OTHER
-} cpu_detect_type;
+enum util_cpu_arch {
+   UTIL_CPU_ARCH_UNKNOWN = 0,
+   UTIL_CPU_ARCH_MIPS,
+   UTIL_CPU_ARCH_ALPHA,
+   UTIL_CPU_ARCH_SPARC,
+   UTIL_CPU_ARCH_X86,
+   UTIL_CPU_ARCH_POWERPC
+};
 
-struct cpu_detect_caps {
-       cpu_detect_type type;
-       int             nrcpu;
+struct util_cpu_caps {
+   enum util_cpu_arch arch;
+   unsigned nr_cpus;
 
-       /* Feature flags */
-       int             x86cpuType;
-       int             cacheline;
+   /* Feature flags */
+   int x86_cpu_type;
+   unsigned cacheline;
 
-       int             hasTSC;
-       int             hasMMX;
-       int             hasMMX2;
-       int             hasSSE;
-       int             hasSSE2;
-       int             hasSSE3;
-       int             hasSSSE3;
-       int             has3DNow;
-       int             has3DNowExt;
-       int             hasAltiVec;
+   unsigned has_tsc:1;
+   unsigned has_mmx:1;
+   unsigned has_mmx2:1;
+   unsigned has_sse:1;
+   unsigned has_sse2:1;
+   unsigned has_sse3:1;
+   unsigned has_ssse3:1;
+   unsigned has_sse4_1:1;
+   unsigned has_3dnow:1;
+   unsigned has_3dnow_ext:1;
+   unsigned has_altivec:1;
 };
 
-/* prototypes */
-void cpu_detect_initialize(void);
-struct cpu_detect_caps *cpu_detect_get_caps(void);
+extern struct util_cpu_caps
+util_cpu_caps;
+
+void util_cpu_detect(void);
 
-int cpu_detect_get_tsc(void);
-int cpu_detect_get_mmx(void);
-int cpu_detect_get_mmx2(void);
-int cpu_detect_get_sse(void);
-int cpu_detect_get_sse2(void);
-int cpu_detect_get_sse3(void);
-int cpu_detect_get_ssse3(void);
-int cpu_detect_get_3dnow(void);
-int cpu_detect_get_3dnow2(void);
-int cpu_detect_get_altivec(void);
 
-#endif /* _CPU_DETECT_H */
+#endif /* _UTIL_CPU_DETECT_H */