aco: Extract merged_wave_info_to_mask to its own function.
authorTimur Kristóf <timur.kristof@gmail.com>
Tue, 31 Mar 2020 08:49:52 +0000 (10:49 +0200)
committerMarge Bot <eric+marge@anholt.net>
Tue, 7 Apr 2020 11:29:35 +0000 (11:29 +0000)
Currently we only use this at the beginning of merged shader parts,
but we are going to need to use it with some NGG code as well.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3576>

src/amd/compiler/aco_instruction_selection.cpp

index 8bc0e58cf48ae49be837248b9a0be764f3c81dc3..6128878c83ef67022fc50d01c83252cb3e0d1313 100644 (file)
@@ -10254,6 +10254,31 @@ void cleanup_cfg(Program *program)
    }
 }
 
+Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i)
+{
+   Builder bld(ctx->program, ctx->block);
+
+   /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */
+   Temp count = i == 0
+                ? get_arg(ctx, ctx->args->merged_wave_info)
+                : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
+                           get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u));
+
+   Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u));
+   Temp cond;
+
+   if (ctx->program->wave_size == 64) {
+      /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */
+      Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */));
+      cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64));
+   } else {
+      /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */
+      cond = emit_extract_vector(ctx, mask, 0, bld.lm);
+   }
+
+   return cond;
+}
+
 void select_program(Program *program,
                     unsigned shader_count,
                     struct nir_shader *const *shaders,
@@ -10291,25 +10316,7 @@ void select_program(Program *program,
       bool check_merged_wave_info = ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader);
       bool endif_merged_wave_info = ctx.tcs_in_out_eq ? i == 1 : check_merged_wave_info;
       if (check_merged_wave_info) {
-         Builder bld(ctx.program, ctx.block);
-
-         /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */
-         Temp count = i == 0 ? get_arg(&ctx, args->merged_wave_info)
-                             : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
-                                        get_arg(&ctx, args->merged_wave_info), Operand(i * 8u));
-
-         Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u));
-         Temp cond;
-
-         if (ctx.program->wave_size == 64) {
-            /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */
-            Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */));
-            cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64));
-         } else {
-            /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */
-            cond = emit_extract_vector(&ctx, mask, 0, bld.lm);
-         }
-
+         Temp cond = merged_wave_info_to_mask(&ctx, i);
          begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond);
       }