From: Marek Olšák Date: Wed, 4 Mar 2020 00:01:17 +0000 (-0500) Subject: ac: add a bug workaround for the 100% NGG culling case X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fc65df56519af568c2e5954793c17a8aed858148;p=mesa.git ac: add a bug workaround for the 100% NGG culling case Fixes: 8db00a51f85 - radeonsi/gfx10: implement NGG culling for 4x wave32 subgroups Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 69d8f4710ca..760d9123c62 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -4799,6 +4799,21 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav { LLVMBuilderRef builder = ctx->builder; LLVMValueRef tmp; + bool export_dummy_prim = false; + + /* HW workaround for a GPU hang with 100% culling. + * We always have to export at least 1 primitive. + * Export a degenerate triangle using vertex 0 for all 3 vertices. + */ + if (prim_cnt == ctx->i32_0 && + (ctx->family == CHIP_NAVI10 || + ctx->family == CHIP_NAVI12 || + ctx->family == CHIP_NAVI14)) { + assert(vtx_cnt == ctx->i32_0); + prim_cnt = ctx->i32_1; + vtx_cnt = ctx->i32_1; + export_dummy_prim = true; + } ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020); @@ -4806,6 +4821,24 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav tmp = LLVMBuildOr(builder, tmp, vtx_cnt, ""); ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp); + if (export_dummy_prim) { + struct ac_ngg_prim prim = {}; + /* The vertex indices are 0,0,0. */ + prim.passthrough = ctx->i32_0; + + struct ac_export_args pos = {}; + pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0; + pos.target = V_008DFC_SQ_EXP_POS; + pos.enabled_channels = 0xf; + pos.done = true; + + ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), + ctx->i32_0, ""), 5021); + ac_build_export_prim(ctx, &prim); + ac_build_export(ctx, &pos); + ac_build_endif(ctx, 5021); + } + ac_build_endif(ctx, 5020); }