radeonsi/gfx10: enable NGG passthrough for eligible shaders
authorMarek Olšák <marek.olsak@amd.com>
Tue, 17 Dec 2019 00:09:21 +0000 (19:09 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 27 Dec 2019 18:50:57 +0000 (13:50 -0500)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 419a4021943b24afc348846cd17c1afef89c86ed..5aac4ceac256e6507734995094da85b267a10ecc 100644 (file)
@@ -98,6 +98,7 @@ struct ngg_prim {
        LLVMValueRef isnull;
        LLVMValueRef index[3];
        LLVMValueRef edgeflag[3];
+       LLVMValueRef passthrough;
 };
 
 static void build_export_prim(struct si_shader_context *ctx,
@@ -107,17 +108,21 @@ static void build_export_prim(struct si_shader_context *ctx,
        struct ac_export_args args;
        LLVMValueRef tmp;
 
-       tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
-       args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
-
-       for (unsigned i = 0; i < prim->num_vertices; ++i) {
-               tmp = LLVMBuildShl(builder, prim->index[i],
-                                  LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
-               args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
-               tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
-               tmp = LLVMBuildShl(builder, tmp,
-                                  LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
-               args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+       if (prim->passthrough) {
+               args.out[0] = prim->passthrough;
+       } else {
+               tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
+               args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
+
+               for (unsigned i = 0; i < prim->num_vertices; ++i) {
+                       tmp = LLVMBuildShl(builder, prim->index[i],
+                                          LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
+                       args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+                       tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
+                       tmp = LLVMBuildShl(builder, tmp,
+                                          LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
+                       args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
+               }
        }
 
        args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
@@ -729,25 +734,29 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
        {
                struct ngg_prim prim = {};
 
-               prim.num_vertices = num_vertices;
-               prim.isnull = ctx->ac.i1false;
-               memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+               if (gfx10_is_ngg_passthrough(ctx->shader)) {
+                       prim.passthrough = ac_get_arg(&ctx->ac, ctx->gs_vtx01_offset);
+               } else {
+                       prim.num_vertices = num_vertices;
+                       prim.isnull = ctx->ac.i1false;
+                       memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+
+                       for (unsigned i = 0; i < num_vertices; ++i) {
+                               if (ctx->type != PIPE_SHADER_VERTEX) {
+                                       prim.edgeflag[i] = ctx->i1false;
+                                       continue;
+                               }
 
-               for (unsigned i = 0; i < num_vertices; ++i) {
-                       if (ctx->type != PIPE_SHADER_VERTEX) {
-                               prim.edgeflag[i] = ctx->i1false;
-                               continue;
-                       }
+                               tmp = LLVMBuildLShr(builder,
+                                                   ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
+                                                   LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
+                               prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
 
-                       tmp = LLVMBuildLShr(builder,
-                                           ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id),
-                                           LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
-                       prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-
-                       if (sel->info.writes_edgeflag) {
-                               tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
-                               prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
-                                                               tmp2, "");
+                               if (sel->info.writes_edgeflag) {
+                                       tmp2 = LLVMBuildLoad(builder, user_edgeflags[i], "");
+                                       prim.edgeflag[i] = LLVMBuildAnd(builder, prim.edgeflag[i],
+                                                                       tmp2, "");
+                               }
                        }
                }
 
index bd0c777c148be08a0fe0976849fc68ef91e6cb81..ee9a75c9739a145a7ebe88fc01708cffdfd5056f 100644 (file)
@@ -790,7 +790,7 @@ union si_vgt_param_key {
        uint32_t index;
 };
 
-#define SI_NUM_VGT_STAGES_KEY_BITS 4
+#define SI_NUM_VGT_STAGES_KEY_BITS 5
 #define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)
 
 /* The VGT_SHADER_STAGES key used to index the table of precomputed values.
@@ -801,6 +801,7 @@ union si_vgt_stages_key {
 #if UTIL_ARCH_LITTLE_ENDIAN
                unsigned tess:1;
                unsigned gs:1;
+               unsigned ngg_passthrough:1;
                unsigned ngg:1; /* gfx10+ */
                unsigned streamout:1; /* only used with NGG */
                unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
@@ -808,6 +809,7 @@ union si_vgt_stages_key {
                unsigned _pad:32 - SI_NUM_VGT_STAGES_KEY_BITS;
                unsigned streamout:1;
                unsigned ngg:1;
+               unsigned ngg_passthrough:1;
                unsigned gs:1;
                unsigned tess:1;
 #endif
index a81636801cccd8ac6000151714059faf9827c6cb..6fa9c37bf86da0b3ce69bb587a388ac5a0ef55bf 100644 (file)
@@ -3872,9 +3872,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen,
        }
 
        if (key.u.ngg) {
-               stages |= S_028B54_PRIMGEN_EN(1);
-               if (key.u.streamout)
-                       stages |= S_028B54_NGG_WAVE_ID_EN(1);
+               stages |= S_028B54_PRIMGEN_EN(1) |
+                         S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
+                         S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough);
        } else if (key.u.gs)
                stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 
@@ -4027,6 +4027,10 @@ bool si_update_shaders(struct si_context *sctx)
                }
        }
 
+       /* This must be done after the shader variant is selected. */
+       if (sctx->ngg)
+               key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current);
+
        si_update_vgt_shader_config(sctx, key);
 
        if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)