From bc80741d7ab201cdffd2af4fbcfc4237dcce5f7e Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 21 Nov 2013 09:14:47 +0000 Subject: [PATCH] gallivm: Use 8 wide AoS sampling on AVX2. v2: Make sure that with num_lods > 1 and min_filter != mag_filter we still enter the splitting path. So this case would still use 4-wide aos path (as a side note, the 4-wide aos sampling path could actually be improved quite a bit if we have avx2, by just doing the filtering with 256bit vectors). Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 1b48fc2a91e..1477a72d601 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -2860,12 +2860,13 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm, } /* - * we only try 8-wide sampling with soa as it appears to - * be a loss with aos with AVX (but it should work, except - * for conformance if min_filter != mag_filter if num_lods > 1). - * (It should be faster if we'd support avx2) + * we only try 8-wide sampling with soa or if we have AVX2 + * as it appears to be a loss with just AVX) */ - if (num_quads == 1 || !use_aos) { + if (num_quads == 1 || !use_aos || + (util_cpu_caps.has_avx2 && + (bld.num_lods == 1 || + derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) { if (use_aos) { /* do sampling/filtering with fixed pt arithmetic */ lp_build_sample_aos(&bld, sampler_index, -- 2.30.2