From d345bfe1958db162b1ddde85eccd3248f884f231 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Tue, 31 Mar 2020 10:49:52 +0200 Subject: [PATCH] aco: Extract merged_wave_info_to_mask to its own function. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently we only use this at the beginning of merged shader parts, but we are going to need to use it with some NGG code as well. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 45 +++++++++++-------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8bc0e58cf48..6128878c83e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10254,6 +10254,31 @@ void cleanup_cfg(Program *program) } } +Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i) +{ + Builder bld(ctx->program, ctx->block); + + /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */ + Temp count = i == 0 + ? get_arg(ctx, ctx->args->merged_wave_info) + : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u)); + + Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u)); + Temp cond; + + if (ctx->program->wave_size == 64) { + /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */ + Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */)); + cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64)); + } else { + /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */ + cond = emit_extract_vector(ctx, mask, 0, bld.lm); + } + + return cond; +} + void select_program(Program *program, unsigned shader_count, struct nir_shader *const *shaders, @@ -10291,25 +10316,7 @@ void select_program(Program *program, bool check_merged_wave_info = ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader); bool endif_merged_wave_info = ctx.tcs_in_out_eq ? i == 1 : check_merged_wave_info; if (check_merged_wave_info) { - Builder bld(ctx.program, ctx.block); - - /* The s_bfm only cares about s0.u[5:0] so we don't need either s_bfe nor s_and here */ - Temp count = i == 0 ? get_arg(&ctx, args->merged_wave_info) - : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), - get_arg(&ctx, args->merged_wave_info), Operand(i * 8u)); - - Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u)); - Temp cond; - - if (ctx.program->wave_size == 64) { - /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */ - Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */)); - cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64)); - } else { - /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of the register */ - cond = emit_extract_vector(&ctx, mask, 0, bld.lm); - } - + Temp cond = merged_wave_info_to_mask(&ctx, i); begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond); } -- 2.30.2