util: add avx2 and xop detection to cpu detection code

author Roland Scheidegger <sroland@vmware.com>

Tue, 20 Aug 2013 02:20:33 +0000 (04:20 +0200)

committer Roland Scheidegger <sroland@vmware.com>

Tue, 20 Aug 2013 21:00:24 +0000 (23:00 +0200)
author Roland Scheidegger <sroland@vmware.com>
Tue, 20 Aug 2013 02:20:33 +0000 (04:20 +0200)
committer Roland Scheidegger <sroland@vmware.com>
Tue, 20 Aug 2013 21:00:24 +0000 (23:00 +0200)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c

index 61eadb838dc601a54174857580b5b9779467259b..61b561f9343706ff60c8d4945abffc0efd6212b9 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -461,12 +461,15 @@ lp_build_init(void)
                                                   lp_native_vector_width);
  
     if (lp_native_vector_width <= 128) {
-      /* Hide AVX support, as often LLVM AVX instrinsics are only guarded by
+      /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by
         * "util_cpu_caps.has_avx" predicate, and lack the
         * "lp_native_vector_width > 128" predicate. And also to ensure a more
         * consistent behavior, allowing one to test SSE2 on AVX machines.
+       * XXX: should not play games with util_cpu_caps directly as it might
+       * get used for other things outside llvm too.
         */
        util_cpu_caps.has_avx = 0;
+      util_cpu_caps.has_avx2 = 0;
     }
  
     if (!HAVE_AVX) {
@@ -476,13 +479,17 @@ lp_build_init(void)
         * omit it unnecessarily on amd cpus, see above).
         */
        util_cpu_caps.has_f16c = 0;
+      util_cpu_caps.has_xop = 0;
     }
  
  #ifdef PIPE_ARCH_PPC_64
     /* Set the NJ bit in VSCR to 0 so denormalized values are handled as
-    * specified by IEEE standard (PowerISA 2.06 - Section 6.3). This garantees
+    * specified by IEEE standard (PowerISA 2.06 - Section 6.3). This guarantees
      * that some rounding and half-float to float handling does not round
      * incorrectly to 0.
+    * XXX: should eventually follow same logic on all platforms.
+    * Right now denorms get explicitly disabled (but elsewhere) for x86,
+    * whereas ppc64 explicitly enables them...
      */
     if (util_cpu_caps.has_altivec) {
        unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c

index 87ad780951e227bf9afcf42634fef3514b8c8226..2ff40bb00505fbff9b42383965fb287d3d26048c 100644 (file)
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -212,6 +212,44 @@ cpuid(uint32_t ax, uint32_t *p)
  #endif
  }
  
+/**
+ * @sa cpuid.h included in gcc-4.4 onwards.
+ * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
+ */
+static INLINE void
+cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
+{
+#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
+   __asm __volatile (
+     "xchgl %%ebx, %1\n\t"
+     "cpuid\n\t"
+     "xchgl %%ebx, %1"
+     : "=a" (p[0]),
+       "=S" (p[1]),
+       "=c" (p[2]),
+       "=d" (p[3])
+     : "0" (ax), "2" (cx)
+   );
+#elif (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86_64)
+   __asm __volatile (
+     "cpuid\n\t"
+     : "=a" (p[0]),
+       "=b" (p[1]),
+       "=c" (p[2]),
+       "=d" (p[3])
+     : "0" (ax), "2" (cx)
+   );
+#elif defined(PIPE_CC_MSVC)
+   __cpuidex(p, ax, cx);
+#else
+   p[0] = 0;
+   p[1] = 0;
+   p[2] = 0;
+   p[3] = 0;
+#endif
+}
+
+
  static INLINE uint64_t xgetbv(void)
  {
  #if defined(PIPE_CC_GCC)
@@ -341,6 +379,11 @@ util_cpu_detect(void)
           if (cacheline > 0)
              util_cpu_caps.cacheline = cacheline;
        }
+      if (util_cpu_caps.has_avx && regs[0] >= 0x00000007) {
+         uint32_t regs7[4];
+         cpuid_count(0x00000007, 0x00000000, regs7);
+         util_cpu_caps.has_avx2 = (regs7[1] >> 5) & 1;
+      }
  
        if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) {
           /* GenuineIntel */
@@ -357,6 +400,9 @@ util_cpu_detect(void)
           util_cpu_caps.has_mmx2 |= (regs2[3] >> 22) & 1;
           util_cpu_caps.has_3dnow = (regs2[3] >> 31) & 1;
           util_cpu_caps.has_3dnow_ext = (regs2[3] >> 30) & 1;
+
+         util_cpu_caps.has_xop = util_cpu_caps.has_avx &&
+                                 ((regs2[2] >> 11) & 1);
        }
  
        if (regs[0] >= 0x80000006) {
@@ -394,10 +440,12 @@ util_cpu_detect(void)
        debug_printf("util_cpu_caps.has_sse4_1 = %u\n", util_cpu_caps.has_sse4_1);
        debug_printf("util_cpu_caps.has_sse4_2 = %u\n", util_cpu_caps.has_sse4_2);
        debug_printf("util_cpu_caps.has_avx = %u\n", util_cpu_caps.has_avx);
+      debug_printf("util_cpu_caps.has_avx2 = %u\n", util_cpu_caps.has_avx2);
        debug_printf("util_cpu_caps.has_f16c = %u\n", util_cpu_caps.has_f16c);
        debug_printf("util_cpu_caps.has_popcnt = %u\n", util_cpu_caps.has_popcnt);
        debug_printf("util_cpu_caps.has_3dnow = %u\n", util_cpu_caps.has_3dnow);
        debug_printf("util_cpu_caps.has_3dnow_ext = %u\n", util_cpu_caps.has_3dnow_ext);
+      debug_printf("util_cpu_caps.has_xop = %u\n", util_cpu_caps.has_xop);
        debug_printf("util_cpu_caps.has_altivec = %u\n", util_cpu_caps.has_altivec);
        debug_printf("util_cpu_caps.has_daz = %u\n", util_cpu_caps.has_daz);
     }
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h

index cc3e0ce03443f80d53f4df6f8ae73ee52a631e7e..5ccfc931697f25e089474f58a27adc0af8f6d892 100644 (file)
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -64,9 +64,11 @@ struct util_cpu_caps {
     unsigned has_sse4_2:1;
     unsigned has_popcnt:1;
     unsigned has_avx:1;
+   unsigned has_avx2:1;
     unsigned has_f16c:1;
     unsigned has_3dnow:1;
     unsigned has_3dnow_ext:1;
+   unsigned has_xop:1;
     unsigned has_altivec:1;
     unsigned has_daz:1;
  };
author	Roland Scheidegger <sroland@vmware.com>
	Tue, 20 Aug 2013 02:20:33 +0000 (04:20 +0200)
committer	Roland Scheidegger <sroland@vmware.com>
	Tue, 20 Aug 2013 21:00:24 +0000 (23:00 +0200)
src/gallium/auxiliary/gallivm/lp_bld_init.c		patch \| blob \| history
src/gallium/auxiliary/util/u_cpu_detect.c		patch \| blob \| history
src/gallium/auxiliary/util/u_cpu_detect.h		patch \| blob \| history