radeonsi/gfx10: don't insert NGG streamout atomics if they are never used
authorMarek Olšák <marek.olsak@amd.com>
Fri, 13 Dec 2019 01:17:41 +0000 (20:17 -0500)
committerMarge Bot <eric+marge@anholt.net>
Mon, 16 Dec 2019 20:06:07 +0000 (20:06 +0000)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3095>

src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index 5d77a3b6252e129e46f59a89797d6566234e888e..6239b1ba466db69f5999b0c18c39730e468d1018 100644 (file)
@@ -675,7 +675,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
 
        /* Update query buffer */
        /* TODO: this won't catch 96-bit clear_buffer via transform feedback. */
-       if (!info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
+       if (ctx->screen->use_ngg_streamout &&
+           !info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) {
                tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
                tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
                ac_build_ifcc(&ctx->ac, tmp, 5029); /* if (STREAMOUT_QUERY_ENABLED) */
@@ -1102,38 +1103,40 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
        }
 
        /* Write shader query data. */
-       tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
-       tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
-       ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */
-       unsigned num_query_comps = sel->so.num_outputs ? 8 : 4;
-       tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
-                           LLVMConstInt(ctx->i32, num_query_comps, false), "");
-       ac_build_ifcc(&ctx->ac, tmp, 5110);
-       {
-               LLVMValueRef offset;
-               tmp = tid;
-               if (sel->so.num_outputs)
-                       tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->i32, 3, false), "");
-               offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 32, false), "");
-               if (sel->so.num_outputs) {
-                       tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->i32, 2, false), "");
-                       tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 8, false), "");
-                       offset = LLVMBuildAdd(builder, offset, tmp, "");
-               }
+       if (ctx->screen->use_ngg_streamout) {
+               tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
+               tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, "");
+               ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */
+               unsigned num_query_comps = sel->so.num_outputs ? 8 : 4;
+               tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
+                                   LLVMConstInt(ctx->i32, num_query_comps, false), "");
+               ac_build_ifcc(&ctx->ac, tmp, 5110);
+               {
+                       LLVMValueRef offset;
+                       tmp = tid;
+                       if (sel->so.num_outputs)
+                               tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->i32, 3, false), "");
+                       offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 32, false), "");
+                       if (sel->so.num_outputs) {
+                               tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->i32, 2, false), "");
+                               tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 8, false), "");
+                               offset = LLVMBuildAdd(builder, offset, tmp, "");
+                       }
 
-               tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
-               LLVMValueRef args[] = {
-                       tmp,
-                       ngg_get_query_buf(ctx),
-                       offset,
-                       LLVMConstInt(ctx->i32, 16, false), /* soffset */
-                       ctx->i32_0, /* cachepolicy */
-               };
-               ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
-                                  ctx->i32, args, 5, 0);
+                       tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
+                       LLVMValueRef args[] = {
+                               tmp,
+                               ngg_get_query_buf(ctx),
+                               offset,
+                               LLVMConstInt(ctx->i32, 16, false), /* soffset */
+                               ctx->i32_0, /* cachepolicy */
+                       };
+                       ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
+                                          ctx->i32, args, 5, 0);
+               }
+               ac_build_endif(&ctx->ac, 5110);
+               ac_build_endif(&ctx->ac, 5109);
        }
-       ac_build_endif(&ctx->ac, 5110);
-       ac_build_endif(&ctx->ac, 5109);
 
        /* TODO: culling */