From 8b3b07afc0b97ecff0431486ca57031150985268 Mon Sep 17 00:00:00 2001
From: Jan Zielinski <jan.zielinski@intel.com>
Date: Wed, 18 Mar 2020 13:36:53 +0100
Subject: [PATCH] gallium/gallivm: Remove workaround disabling AVX code for
 newer CPUs

The change enables using full 256-bit AVX and AVX2 instructions
on newer platforms.

Reviewed-by: Alok Hota <alok.hota@intel.com>
Reviewed-by: Adam Jackson <ajax@redhat.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4225>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4225>
---
 src/gallium/auxiliary/gallivm/lp_bld_init.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 4047f2bd781..fd5a35114f4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -435,15 +435,7 @@ lp_build_init(void)
    }
 #endif
 
-   /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
-    * 8-wide vector needs more floating ops than 4-wide (due to padding), it is
-    * actually more efficient to use 4-wide vectors on this processor.
-    *
-    * See also:
-    * - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2
-    */
-   if (util_cpu_caps.has_avx &&
-       util_cpu_caps.has_intel) {
+   if (util_cpu_caps.has_avx2 || util_cpu_caps.has_avx) {
       lp_native_vector_width = 256;
    } else {
       /* Leave it at 128, even when no SIMD extensions are available.
@@ -455,6 +447,7 @@ lp_build_init(void)
    lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH",
                                                  lp_native_vector_width);
 
+#if LLVM_VERSION_MAJOR < 4
    if (lp_native_vector_width <= 128) {
       /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by
        * "util_cpu_caps.has_avx" predicate, and lack the
@@ -468,6 +461,7 @@ lp_build_init(void)
       util_cpu_caps.has_f16c = 0;
       util_cpu_caps.has_fma = 0;
    }
+#endif
 
 #ifdef PIPE_ARCH_PPC_64
    /* Set the NJ bit in VSCR to 0 so denormalized values are handled as
-- 
2.30.2