{
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef tmp;
+ bool export_dummy_prim = false;
+
+ /* HW workaround for a GPU hang with 100% culling.
+ * We always have to export at least 1 primitive.
+ * Export a degenerate triangle using vertex 0 for all 3 vertices.
+ */
+ if (prim_cnt == ctx->i32_0 &&
+ (ctx->family == CHIP_NAVI10 ||
+ ctx->family == CHIP_NAVI12 ||
+ ctx->family == CHIP_NAVI14)) {
+ assert(vtx_cnt == ctx->i32_0);
+ prim_cnt = ctx->i32_1;
+ vtx_cnt = ctx->i32_1;
+ export_dummy_prim = true;
+ }
ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020);
tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp);
+ if (export_dummy_prim) {
+ struct ac_ngg_prim prim = {};
+ /* The vertex indices are 0,0,0. */
+ prim.passthrough = ctx->i32_0;
+
+ struct ac_export_args pos = {};
+ pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0;
+ pos.target = V_008DFC_SQ_EXP_POS;
+ pos.enabled_channels = 0xf;
+ pos.done = true;
+
+ ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx),
+ ctx->i32_0, ""), 5021);
+ ac_build_export_prim(ctx, &prim);
+ ac_build_export(ctx, &pos);
+ ac_build_endif(ctx, 5021);
+ }
+
ac_build_endif(ctx, 5020);
}