gallivm: Add debug option to force SSE2.

author Jose Fonseca <jfonseca@vmware.com>

Fri, 1 Apr 2016 10:06:30 +0000 (11:06 +0100)

committer Jose Fonseca <jfonseca@vmware.com>

Sun, 3 Apr 2016 21:08:57 +0000 (22:08 +0100)
author Jose Fonseca <jfonseca@vmware.com>
Fri, 1 Apr 2016 10:06:30 +0000 (11:06 +0100)
committer Jose Fonseca <jfonseca@vmware.com>
Sun, 3 Apr 2016 21:08:57 +0000 (22:08 +0100)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c

index ab55be4c439c634a13c99dde95d15a32fdef8f2c..6e08ac48d72cd059bc3a106adeb76623dd3053c9 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -399,6 +399,20 @@ lp_build_init(void)
  
     util_cpu_detect();
  
+   /* For simulating less capable machines */
+#ifdef DEBUG
+   if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) {
+      assert(util_cpu_caps.has_sse2);
+      util_cpu_caps.has_sse3 = 0;
+      util_cpu_caps.has_ssse3 = 0;
+      util_cpu_caps.has_sse4_1 = 0;
+      util_cpu_caps.has_sse4_2 = 0;
+      util_cpu_caps.has_avx = 0;
+      util_cpu_caps.has_avx2 = 0;
+      util_cpu_caps.has_f16c = 0;
+   }
+#endif
+
     /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
      * 8-wide vector needs more floating ops than 4-wide (due to padding), it is
      * actually more efficient to use 4-wide vectors on this processor.
@@ -456,17 +470,6 @@ lp_build_init(void)
  
     gallivm_initialized = TRUE;
  
-#if 0
-   /* For simulating less capable machines */
-   util_cpu_caps.has_sse3 = 0;
-   util_cpu_caps.has_ssse3 = 0;
-   util_cpu_caps.has_sse4_1 = 0;
-   util_cpu_caps.has_sse4_2 = 0;
-   util_cpu_caps.has_avx = 0;
-   util_cpu_caps.has_avx2 = 0;
-   util_cpu_caps.has_f16c = 0;
-#endif
-
     return TRUE;
  }
author	Jose Fonseca <jfonseca@vmware.com>
	Fri, 1 Apr 2016 10:06:30 +0000 (11:06 +0100)
committer	Jose Fonseca <jfonseca@vmware.com>
	Sun, 3 Apr 2016 21:08:57 +0000 (22:08 +0100)