From 4846aeaf572db40e612c042205d45174aa6e2d5f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 12 Dec 2019 20:17:41 -0500 Subject: [PATCH] radeonsi/gfx10: don't insert NGG streamout atomics if they are never used Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../drivers/radeonsi/gfx10_shader_ngg.c | 65 ++++++++++--------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 5d77a3b6252..6239b1ba466 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -675,7 +675,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, /* Update query buffer */ /* TODO: this won't catch 96-bit clear_buffer via transform feedback. */ - if (!info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) { + if (ctx->screen->use_ngg_streamout && + !info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD]) { tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1); tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5029); /* if (STREAMOUT_QUERY_ENABLED) */ @@ -1102,38 +1103,40 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) } /* Write shader query data. */ - tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1); - tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, ""); - ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */ - unsigned num_query_comps = sel->so.num_outputs ? 8 : 4; - tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, - LLVMConstInt(ctx->i32, num_query_comps, false), ""); - ac_build_ifcc(&ctx->ac, tmp, 5110); - { - LLVMValueRef offset; - tmp = tid; - if (sel->so.num_outputs) - tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->i32, 3, false), ""); - offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 32, false), ""); - if (sel->so.num_outputs) { - tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->i32, 2, false), ""); - tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 8, false), ""); - offset = LLVMBuildAdd(builder, offset, tmp, ""); - } + if (ctx->screen->use_ngg_streamout) { + tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1); + tmp = LLVMBuildTrunc(builder, tmp, ctx->i1, ""); + ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */ + unsigned num_query_comps = sel->so.num_outputs ? 8 : 4; + tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, + LLVMConstInt(ctx->i32, num_query_comps, false), ""); + ac_build_ifcc(&ctx->ac, tmp, 5110); + { + LLVMValueRef offset; + tmp = tid; + if (sel->so.num_outputs) + tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->i32, 3, false), ""); + offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 32, false), ""); + if (sel->so.num_outputs) { + tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->i32, 2, false), ""); + tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->i32, 8, false), ""); + offset = LLVMBuildAdd(builder, offset, tmp, ""); + } - tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), ""); - LLVMValueRef args[] = { - tmp, - ngg_get_query_buf(ctx), - offset, - LLVMConstInt(ctx->i32, 16, false), /* soffset */ - ctx->i32_0, /* cachepolicy */ - }; - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", - ctx->i32, args, 5, 0); + tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), ""); + LLVMValueRef args[] = { + tmp, + ngg_get_query_buf(ctx), + offset, + LLVMConstInt(ctx->i32, 16, false), /* soffset */ + ctx->i32_0, /* cachepolicy */ + }; + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32", + ctx->i32, args, 5, 0); + } + ac_build_endif(&ctx->ac, 5110); + ac_build_endif(&ctx->ac, 5109); } - ac_build_endif(&ctx->ac, 5110); - ac_build_endif(&ctx->ac, 5109); /* TODO: culling */ -- 2.30.2