From 699e1f5aacd1d9eed8cc1a37ec0dbd11313fbbdc Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 23 May 2018 11:34:15 +0200 Subject: [PATCH] ac: Use DPP for build_ddxy where possible. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit WQM is pretty reliable now on LLVM 7, so let us just use DPP + WQM. This gives approximately a 1.5% performance increase on the vrcompositor built-in benchmark. v2: Use ac_build_quad_swizzle. Reviewed-by: Nicolai Hähnle --- src/amd/common/ac_llvm_build.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 36c1d62637b..4eebbbd4d9d 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1170,7 +1170,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx, LLVMValueRef tl, trbl, args[2]; LLVMValueRef result; - if (ctx->chip_class >= VI) { + if (HAVE_LLVM >= 0x0700) { + unsigned tl_lanes[4], trbl_lanes[4]; + + for (unsigned i = 0; i < 4; ++i) { + tl_lanes[i] = i & mask; + trbl_lanes[i] = (i & mask) + idx; + } + + tl = ac_build_quad_swizzle(ctx, val, + tl_lanes[0], tl_lanes[1], + tl_lanes[2], tl_lanes[3]); + trbl = ac_build_quad_swizzle(ctx, val, + trbl_lanes[0], trbl_lanes[1], + trbl_lanes[2], trbl_lanes[3]); + } else if (ctx->chip_class >= VI) { LLVMValueRef thread_id, tl_tid, trbl_tid; thread_id = ac_get_thread_id(ctx); -- 2.30.2