From 9a089baff1af757b1c0f033f4bb16cb2c8864271 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 9 Jan 2020 16:51:34 +0000 Subject: [PATCH] aco: optimize boolean phis with uniform selections MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Even though the boolean can be divergent, the control flow can be (at least partially) uniform. For example, we don't have to create any s_andn2_b64/s_and_b64/s_or_b64 instructions with this code: a = ... loop { b = bool_phi a, c if (uniform) break c = ... } d = phi c fossil-db (Navi): Totals from 5506 (4.05% of 135946) affected shaders: SGPRs: 605720 -> 604024 (-0.28%) SpillSGPRs: 52025 -> 51733 (-0.56%) CodeSize: 65221188 -> 64957808 (-0.40%); split: -0.41%, +0.00% Instrs: 12637881 -> 12584610 (-0.42%); split: -0.42%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_lower_phis.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp index a7d2b6dce72..ad0c620a69f 100644 --- a/src/amd/compiler/aco_lower_phis.cpp +++ b/src/amd/compiler/aco_lower_phis.cpp @@ -35,6 +35,9 @@ namespace aco { struct ssa_state { + bool checked_preds_for_uniform; + bool all_preds_uniform; + bool needs_init; uint64_t cur_undef_operands; @@ -152,6 +155,19 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, { Builder bld(program); + if (!state->checked_preds_for_uniform) { + state->all_preds_uniform = !(block->kind & block_kind_merge); + for (unsigned pred : block->logical_preds) + state->all_preds_uniform = state->all_preds_uniform && (program->blocks[pred].kind & block_kind_uniform); + state->checked_preds_for_uniform = true; + } + + if (state->all_preds_uniform) { + assert(block->logical_preds.size() == block->linear_preds.size()); + phi->opcode = aco_opcode::p_linear_phi; + return; + } + state->latest.resize(program->blocks.size()); uint64_t undef_operands = 0; @@ -180,14 +196,23 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, state->writes[block->logical_preds[i]] = program->allocateId(); } + bool uniform_merge = block->kind & block_kind_loop_header; + for (unsigned i = 0; i < phi->operands.size(); i++) { Block *pred = &program->blocks[block->logical_preds[i]]; + bool need_get_ssa = !uniform_merge; + if (block->kind & block_kind_loop_header && !(pred->kind & block_kind_uniform)) + uniform_merge = false; + if (phi->operands[i].isUndefined()) continue; - Operand cur = get_ssa(program, pred->index, state, true); + Operand cur(bld.lm); + if (need_get_ssa) + cur = get_ssa(program, pred->index, state, true); assert(cur.regClass() == bld.lm); + Temp new_cur = {state->writes.at(pred->index), program->lane_mask}; assert(new_cur.regClass() == bld.lm); @@ -241,6 +266,7 @@ void lower_phis(Program* program) ssa_state state; for (Block& block : program->blocks) { + state.checked_preds_for_uniform = false; state.needs_init = true; for (aco_ptr& phi : block.instructions) { if (phi->opcode == aco_opcode::p_phi) { -- 2.30.2