From 2a414c39619320c44e4d0f8f7a7609598081e2b2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 4 Oct 2017 05:07:50 +0200 Subject: [PATCH] radeonsi: postponed KILL isn't postponed anymore, but maintains WQM MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This restores performance for the drirc workaround, i.e. KILL_IF does: visible = src0 >= 0; kill_flag &= visible; // accumulate kills amdgcn_kill(wqm_vote(visible)); // kill fully dead quads only And all helper pixels are killed at the end of the shader: amdgcn_kill(kill_flag); Reviewed-by: Nicolai Hähnle --- src/amd/common/ac_llvm_build.c | 7 +++++++ src/amd/common/ac_llvm_build.h | 1 + src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 3e5b795d6fe..80b027e8b08 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1405,6 +1405,13 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, AC_FUNC_ATTR_LEGACY); } +LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) +{ + assert(HAVE_LLVM >= 0x0600); + return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, + &i1, 1, AC_FUNC_ATTR_READNONE); +} + void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) { if (HAVE_LLVM >= 0x0600) { diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 8f7a9abb76c..996f55862ba 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -265,6 +265,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a); LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1); void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1); LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, LLVMValueRef width, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 283a889d847..913b6c32959 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -69,6 +69,12 @@ static void kil_emit(const struct lp_build_tgsi_action *action, } if (ctx->shader->selector->force_correct_derivs_after_kill) { + /* LLVM 6.0 can kill immediately while maintaining WQM. */ + if (HAVE_LLVM >= 0x0600) { + ac_build_kill_if_false(&ctx->ac, + ac_build_wqm_vote(&ctx->ac, visible)); + } + LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, ""); mask = LLVMBuildAnd(builder, mask, visible, ""); LLVMBuildStore(builder, mask, ctx->postponed_kill); -- 2.30.2