gallivm: Don't use llvm.x86.avx.max/min.ps.256 inadvertently.
authorJosé Fonseca <jfonseca@vmware.com>
Wed, 10 Oct 2012 18:44:49 +0000 (19:44 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Fri, 12 Oct 2012 17:52:28 +0000 (18:52 +0100)
Could happen when CPU supports AVX, but LLVM doesn't.

src/gallium/auxiliary/gallivm/lp_bld_arit.c

index d23ff0bf996eb327eabe79f06847f47d0e3c5160..f6310a8ef309be2c2716cd5c09e529e69ec1a54b 100644 (file)
@@ -92,7 +92,7 @@ lp_build_min_simple(struct lp_build_context *bld,
             intrinsic = "llvm.x86.sse.min.ss";
             intr_size = 128;
          }
-         else if (type.length <= 4 || !util_cpu_caps.has_avx) {
+         else if (type.length <= 4 || !util_cpu_caps.has_avx || lp_native_vector_width < 256) {
             intrinsic = "llvm.x86.sse.min.ps";
             intr_size = 128;
          }
@@ -106,7 +106,7 @@ lp_build_min_simple(struct lp_build_context *bld,
             intrinsic = "llvm.x86.sse2.min.sd";
             intr_size = 128;
          }
-         else if (type.length == 2 || !util_cpu_caps.has_avx) {
+         else if (type.length == 2 || !util_cpu_caps.has_avx || lp_native_vector_width < 256) {
             intrinsic = "llvm.x86.sse2.min.pd";
             intr_size = 128;
          }
@@ -182,7 +182,7 @@ lp_build_max_simple(struct lp_build_context *bld,
             intrinsic = "llvm.x86.sse.max.ss";
             intr_size = 128;
          }
-         else if (type.length <= 4 || !util_cpu_caps.has_avx) {
+         else if (type.length <= 4 || !util_cpu_caps.has_avx || lp_native_vector_width < 256) {
             intrinsic = "llvm.x86.sse.max.ps";
             intr_size = 128;
          }
@@ -196,7 +196,7 @@ lp_build_max_simple(struct lp_build_context *bld,
             intrinsic = "llvm.x86.sse2.max.sd";
             intr_size = 128;
          }
-         else if (type.length == 2 || !util_cpu_caps.has_avx) {
+         else if (type.length == 2 || !util_cpu_caps.has_avx || lp_native_vector_width < 256) {
             intrinsic = "llvm.x86.sse2.max.pd";
             intr_size = 128;
          }