From 1a07df840e0429e84905113610273d684c06ff3d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 4 Dec 2019 20:27:46 -0500 Subject: [PATCH] radeonsi: deduplicate ES and GS thread enablement code Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../drivers/radeonsi/gfx10_shader_ngg.c | 8 +--- src/gallium/drivers/radeonsi/si_shader.c | 37 +++++++++++-------- .../drivers/radeonsi/si_shader_internal.h | 2 + 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 23cb1ee34ca..5d77a3b6252 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -573,12 +573,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); - LLVMValueRef prims_in_wave = si_unpack_param(ctx, ctx->merged_wave_info, 8, 8); - LLVMValueRef vtx_in_wave = si_unpack_param(ctx, ctx->merged_wave_info, 0, 8); - LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT, - ac_get_thread_id(&ctx->ac), prims_in_wave, ""); - LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT, - ac_get_thread_id(&ctx->ac), vtx_in_wave, ""); + LLVMValueRef is_gs_thread = si_is_gs_thread(ctx); + LLVMValueRef is_es_thread = si_is_es_thread(ctx); LLVMValueRef vtxindex[] = { si_unpack_param(ctx, ctx->gs_vtx01_offset, 0, 16), si_unpack_param(ctx, ctx->gs_vtx01_offset, 16, 16), diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2cd0e453501..03fc1bef2bf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5993,6 +5993,22 @@ static bool si_vs_needs_prolog(const struct si_shader_selector *sel, return sel->vs_needs_prolog || key->ls_vgpr_fix; } +LLVMValueRef si_is_es_thread(struct si_shader_context *ctx) +{ + /* Return true if the current thread should execute an ES thread. */ + return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, + ac_get_thread_id(&ctx->ac), + si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), ""); +} + +LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx) +{ + /* Return true if the current thread should execute a GS thread. */ + return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, + ac_get_thread_id(&ctx->ac), + si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), ""); +} + static bool si_compile_tgsi_main(struct si_shader_context *ctx, struct nir_shader *nir, bool free_nir) { @@ -6160,7 +6176,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, } else if (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY || (shader->key.as_ngg && !shader->key.as_es)) { - LLVMValueRef num_threads; + LLVMValueRef thread_enabled; bool nested_barrier; if (!shader->is_monolithic || @@ -6177,21 +6193,15 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, nested_barrier = true; } - /* Number of patches / primitives */ - num_threads = si_unpack_param(ctx, ctx->merged_wave_info, 8, 8); + thread_enabled = si_is_gs_thread(ctx); } else { - /* Number of vertices */ - num_threads = si_unpack_param(ctx, ctx->merged_wave_info, 0, 8); + thread_enabled = si_is_es_thread(ctx); nested_barrier = false; } - LLVMValueRef ena = - LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, - ac_get_thread_id(&ctx->ac), num_threads, ""); - ctx->merged_wrap_if_entry_block = LLVMGetInsertBlock(ctx->ac.builder); ctx->merged_wrap_if_label = 11500; - ac_build_ifcc(&ctx->ac, ena, ctx->merged_wrap_if_label); + ac_build_ifcc(&ctx->ac, thread_enabled, ctx->merged_wrap_if_label); if (nested_barrier) { /* Execute a barrier before the second shader in @@ -7423,12 +7433,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, LLVMBasicBlockRef if_entry_block = NULL; if (key->vs_prolog.is_monolithic && key->vs_prolog.as_ngg) { - LLVMValueRef num_threads; - LLVMValueRef ena; - - num_threads = si_unpack_param(ctx, merged_wave_info, 0, 8); - ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, - ac_get_thread_id(&ctx->ac), num_threads, ""); + LLVMValueRef ena = si_is_es_thread(ctx); if_entry_block = LLVMGetInsertBlock(ctx->ac.builder); ac_build_ifcc(&ctx->ac, ena, 11501); wrapped = true; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 1169e60db7f..ed46d8efe4b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -375,6 +375,8 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir); LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift, unsigned bitwidth); +LLVMValueRef si_is_es_thread(struct si_shader_context *ctx); +LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx); void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, -- 2.30.2