radeonsi: deduplicate ES and GS thread enablement code

author Marek Olšák <marek.olsak@amd.com>

Thu, 5 Dec 2019 01:27:46 +0000 (20:27 -0500)

committer Marge Bot <eric+marge@anholt.net>

Mon, 16 Dec 2019 20:06:07 +0000 (20:06 +0000)
author Marek Olšák <marek.olsak@amd.com>
Thu, 5 Dec 2019 01:27:46 +0000 (20:27 -0500)
committer Marge Bot <eric+marge@anholt.net>
Mon, 16 Dec 2019 20:06:07 +0000 (20:06 +0000)
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index 23cb1ee34cad816b4be3515bae754d842533d30d..5d77a3b6252e129e46f59a89797d6566234e888e 100644 (file)
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -573,12 +573,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
  
         ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
  
-       LLVMValueRef prims_in_wave = si_unpack_param(ctx, ctx->merged_wave_info, 8, 8);
-       LLVMValueRef vtx_in_wave = si_unpack_param(ctx, ctx->merged_wave_info, 0, 8);
-       LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT,
-                                                 ac_get_thread_id(&ctx->ac), prims_in_wave, "");
-       LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT,
-                                                 ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
+       LLVMValueRef is_gs_thread = si_is_gs_thread(ctx);
+       LLVMValueRef is_es_thread = si_is_es_thread(ctx);
         LLVMValueRef vtxindex[] = {
                 si_unpack_param(ctx, ctx->gs_vtx01_offset, 0, 16),
                 si_unpack_param(ctx, ctx->gs_vtx01_offset, 16, 16),
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index 2cd0e45350110fcb833ecbf370c52c9ef09f75c6..03fc1bef2bf56f7f317fb47b85a200f53e4bc7f9 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5993,6 +5993,22 @@ static bool si_vs_needs_prolog(const struct si_shader_selector *sel,
         return sel->vs_needs_prolog || key->ls_vgpr_fix;
  }
  
+LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)
+{
+       /* Return true if the current thread should execute an ES thread. */
+       return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+                            ac_get_thread_id(&ctx->ac),
+                            si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), "");
+}
+
+LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
+{
+       /* Return true if the current thread should execute a GS thread. */
+       return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+                            ac_get_thread_id(&ctx->ac),
+                            si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
+}
+
  static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                                  struct nir_shader *nir, bool free_nir)
  {
@@ -6160,7 +6176,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                 } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
                            ctx->type == PIPE_SHADER_GEOMETRY ||
                            (shader->key.as_ngg && !shader->key.as_es)) {
-                       LLVMValueRef num_threads;
+                       LLVMValueRef thread_enabled;
                         bool nested_barrier;
  
                         if (!shader->is_monolithic ||
@@ -6177,21 +6193,15 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                                         nested_barrier = true;
                                 }
  
-                               /* Number of patches / primitives */
-                               num_threads = si_unpack_param(ctx, ctx->merged_wave_info, 8, 8);
+                               thread_enabled = si_is_gs_thread(ctx);
                         } else {
-                               /* Number of vertices */
-                               num_threads = si_unpack_param(ctx, ctx->merged_wave_info, 0, 8);
+                               thread_enabled = si_is_es_thread(ctx);
                                 nested_barrier = false;
                         }
  
-                       LLVMValueRef ena =
-                               LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
-                                           ac_get_thread_id(&ctx->ac), num_threads, "");
-
                         ctx->merged_wrap_if_entry_block = LLVMGetInsertBlock(ctx->ac.builder);
                         ctx->merged_wrap_if_label = 11500;
-                       ac_build_ifcc(&ctx->ac, ena, ctx->merged_wrap_if_label);
+                       ac_build_ifcc(&ctx->ac, thread_enabled, ctx->merged_wrap_if_label);
  
                         if (nested_barrier) {
                                 /* Execute a barrier before the second shader in
@@ -7423,12 +7433,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
         LLVMBasicBlockRef if_entry_block = NULL;
  
         if (key->vs_prolog.is_monolithic && key->vs_prolog.as_ngg) {
-               LLVMValueRef num_threads;
-               LLVMValueRef ena;
-
-               num_threads = si_unpack_param(ctx, merged_wave_info, 0, 8);
-               ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
-                                       ac_get_thread_id(&ctx->ac), num_threads, "");
+               LLVMValueRef ena = si_is_es_thread(ctx);
                 if_entry_block = LLVMGetInsertBlock(ctx->ac.builder);
                 ac_build_ifcc(&ctx->ac, ena, 11501);
                 wrapped = true;
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h

index 1169e60db7f7567b87fd50eb424e60dfe4713c53..ed46d8efe4b4c8dffc4d3525bd250bf997568241 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -375,6 +375,8 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
  LLVMValueRef si_unpack_param(struct si_shader_context *ctx,
                              struct ac_arg param, unsigned rshift,
                              unsigned bitwidth);
+LLVMValueRef si_is_es_thread(struct si_shader_context *ctx);
+LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx);
  
  void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
                              unsigned max_outputs,
author	Marek Olšák <marek.olsak@amd.com>
	Thu, 5 Dec 2019 01:27:46 +0000 (20:27 -0500)
committer	Marge Bot <eric+marge@anholt.net>
	Mon, 16 Dec 2019 20:06:07 +0000 (20:06 +0000)
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_internal.h		patch \| blob \| history