From aa3df12fc2fbe2963eb09cbd2a126b82d208f0ca Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 16 Dec 2019 19:09:21 -0500 Subject: [PATCH] radeonsi/gfx10: enable NGG passthrough for eligible shaders Acked-by: Pierre-Eric Pelloux-Prayer --- .../drivers/radeonsi/gfx10_shader_ngg.c | 65 +++++++++++-------- src/gallium/drivers/radeonsi/si_pipe.h | 4 +- .../drivers/radeonsi/si_state_shaders.c | 10 ++- 3 files changed, 47 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 419a4021943..5aac4ceac25 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -98,6 +98,7 @@ struct ngg_prim { LLVMValueRef isnull; LLVMValueRef index[3]; LLVMValueRef edgeflag[3]; + LLVMValueRef passthrough; }; static void build_export_prim(struct si_shader_context *ctx, @@ -107,17 +108,21 @@ static void build_export_prim(struct si_shader_context *ctx, struct ac_export_args args; LLVMValueRef tmp; - tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, ""); - args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), ""); - - for (unsigned i = 0; i < prim->num_vertices; ++i) { - tmp = LLVMBuildShl(builder, prim->index[i], - LLVMConstInt(ctx->ac.i32, 10 * i, false), ""); - args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); - tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, ""); - tmp = LLVMBuildShl(builder, tmp, - LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), ""); - args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); + if (prim->passthrough) { + args.out[0] = prim->passthrough; + } else { + tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, ""); + args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), ""); + + for (unsigned i = 0; i < prim->num_vertices; ++i) { + tmp = LLVMBuildShl(builder, prim->index[i], + LLVMConstInt(ctx->ac.i32, 10 * i, false), ""); + args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); + tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, ""); + tmp = LLVMBuildShl(builder, tmp, + LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), ""); + args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, ""); + } } args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, ""); @@ -729,25 +734,29 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, { struct ngg_prim prim = {}; - prim.num_vertices = num_vertices; - prim.isnull = ctx->ac.i1false; - memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3); + if (gfx10_is_ngg_passthrough(ctx->shader)) { + prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset); + } else { + prim.num_vertices = num_vertices; + prim.isnull = ctx->ac.i1false; + memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3); + + for (unsigned i = 0; i < num_vertices; ++i) { + if (ctx->type != PIPE_SHADER_VERTEX) { + prim.edgeflag[i] = ctx->i1false; + continue; + } - for (unsigned i = 0; i < num_vertices; ++i) { - if (ctx->type != PIPE_SHADER_VERTEX) { - prim.edgeflag[i] = ctx->i1false; - continue; - } + tmp = LLVMBuildLShr(builder, + ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id), + LLVMConstInt(ctx->ac.i32, 8 + i, false), ""); + prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); - tmp = LLVMBuildLShr(builder, - ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id), - LLVMConstInt(ctx->ac.i32, 8 + i, false), ""); - prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); - - if (sel->info.writes_edgeflag) { - tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], ""); - prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i], - tmp2, ""); + if (sel->info.writes_edgeflag) { + tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], ""); + prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i], + tmp2, ""); + } } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index bd0c777c148..ee9a75c9739 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -790,7 +790,7 @@ union si_vgt_param_key { uint32_t index; }; -#define SI_NUM_VGT_STAGES_KEY_BITS 4 +#define SI_NUM_VGT_STAGES_KEY_BITS 5 #define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS) /* The VGT_SHADER_STAGES key used to index the table of precomputed values. @@ -801,6 +801,7 @@ union si_vgt_stages_key { #if UTIL_ARCH_LITTLE_ENDIAN unsigned tess:1; unsigned gs:1; + unsigned ngg_passthrough:1; unsigned ngg:1; /* gfx10+ */ unsigned streamout:1; /* only used with NGG */ unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS; @@ -808,6 +809,7 @@ union si_vgt_stages_key { unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS; unsigned streamout:1; unsigned ngg:1; + unsigned ngg_passthrough:1; unsigned gs:1; unsigned tess:1; #endif diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index a81636801cc..6fa9c37bf86 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -3872,9 +3872,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, } if (key.u.ngg) { - stages |= S_028B54_PRIMGEN_EN(1); - if (key.u.streamout) - stages |= S_028B54_NGG_WAVE_ID_EN(1); + stages |= S_028B54_PRIMGEN_EN(1) | + S_028B54_NGG_WAVE_ID_EN(key.u.streamout) | + S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough); } else if (key.u.gs) stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); @@ -4027,6 +4027,10 @@ bool si_update_shaders(struct si_context *sctx) } } + /* This must be done after the shader variant is selected. */ + if (sctx->ngg) + key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current); + si_update_vgt_shader_config(sctx, key); if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable) -- 2.30.2