From 8b3b07afc0b97ecff0431486ca57031150985268 Mon Sep 17 00:00:00 2001 From: Jan Zielinski Date: Wed, 18 Mar 2020 13:36:53 +0100 Subject: [PATCH] gallium/gallivm: Remove workaround disabling AVX code for newer CPUs The change enables using full 256-bit AVX and AVX2 instructions on newer platforms. Reviewed-by: Alok Hota Reviewed-by: Adam Jackson Reviewed-by: Jose Fonseca Reviewed-by: Roland Scheidegger Tested-by: Marge Bot Part-of: --- src/gallium/auxiliary/gallivm/lp_bld_init.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 4047f2bd781..fd5a35114f4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -435,15 +435,7 @@ lp_build_init(void) } #endif - /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using - * 8-wide vector needs more floating ops than 4-wide (due to padding), it is - * actually more efficient to use 4-wide vectors on this processor. - * - * See also: - * - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2 - */ - if (util_cpu_caps.has_avx && - util_cpu_caps.has_intel) { + if (util_cpu_caps.has_avx2 || util_cpu_caps.has_avx) { lp_native_vector_width = 256; } else { /* Leave it at 128, even when no SIMD extensions are available. @@ -455,6 +447,7 @@ lp_build_init(void) lp_native_vector_width = debug_get_num_option("LP_NATIVE_VECTOR_WIDTH", lp_native_vector_width); +#if LLVM_VERSION_MAJOR < 4 if (lp_native_vector_width <= 128) { /* Hide AVX support, as often LLVM AVX intrinsics are only guarded by * "util_cpu_caps.has_avx" predicate, and lack the @@ -468,6 +461,7 @@ lp_build_init(void) util_cpu_caps.has_f16c = 0; util_cpu_caps.has_fma = 0; } +#endif #ifdef PIPE_ARCH_PPC_64 /* Set the NJ bit in VSCR to 0 so denormalized values are handled as -- 2.30.2