From f622e80494f209a0c51cc804bb900eefe8f2fdaa Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 6 Jan 2020 16:50:41 +0000 Subject: [PATCH] aco: create better code for boolean phis with constant operands MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit fossil-db (Navi): Totals from 6394 (4.70% of 135946) affected shaders: SGPRs: 651408 -> 651344 (-0.01%) SpillSGPRs: 52102 -> 52019 (-0.16%) CodeSize: 68369664 -> 68229180 (-0.21%); split: -0.21%, +0.00% Instrs: 13236611 -> 13202126 (-0.26%); split: -0.26%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 14 ++-- src/amd/compiler/aco_lower_phis.cpp | 68 ++++++++++++++----- 2 files changed, 62 insertions(+), 20 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 6fb6e664edd..f8e4ba7e769 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8953,13 +8953,19 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr) } -Operand get_phi_operand(isel_context *ctx, nir_ssa_def *ssa, RegClass rc) +Operand get_phi_operand(isel_context *ctx, nir_ssa_def *ssa, RegClass rc, bool logical) { Temp tmp = get_ssa_temp(ctx, ssa); - if (ssa->parent_instr->type == nir_instr_type_ssa_undef) + if (ssa->parent_instr->type == nir_instr_type_ssa_undef) { return Operand(rc); - else + } else if (logical && ssa->bit_size == 1 && ssa->parent_instr->type == nir_instr_type_load_const) { + if (ctx->program->wave_size == 64) + return Operand(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT64_MAX : 0u); + else + return Operand(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT32_MAX : 0u); + } else { return Operand(tmp); + } } void visit_phi(isel_context *ctx, nir_phi_instr *instr) @@ -9002,7 +9008,7 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr) if (!(ctx->block->kind & block_kind_loop_header) && cur_pred_idx >= preds.size()) continue; cur_pred_idx++; - Operand op = get_phi_operand(ctx, src.second, dst.regClass()); + Operand op = get_phi_operand(ctx, src.second, dst.regClass(), logical); operands[num_operands++] = op; num_defined += !op.isUndefined(); } diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp index b90d99ee424..a7d2b6dce72 100644 --- a/src/amd/compiler/aco_lower_phis.cpp +++ b/src/amd/compiler/aco_lower_phis.cpp @@ -96,9 +96,56 @@ void insert_before_logical_end(Block *block, aco_ptr instr) if (it == block->instructions.crend()) { assert(block->instructions.back()->format == Format::PSEUDO_BRANCH); block->instructions.insert(std::prev(block->instructions.end()), std::move(instr)); - } - else + } else { block->instructions.insert(std::prev(it.base()), std::move(instr)); + } +} + +void build_merge_code(Program *program, Block *block, Definition dst, Operand prev, Operand cur) +{ + Builder bld(program); + + auto IsLogicalEnd = [] (const aco_ptr& instr) -> bool { + return instr->opcode == aco_opcode::p_logical_end; + }; + auto it = std::find_if(block->instructions.rbegin(), block->instructions.rend(), IsLogicalEnd); + assert(it != block->instructions.rend()); + bld.reset(&block->instructions, std::prev(it.base())); + + if (prev.isUndefined()) { + bld.sop1(Builder::s_mov, dst, cur); + return; + } + + bool prev_is_constant = prev.isConstant() && prev.constantValue64(true) + 1u < 2u; + bool cur_is_constant = cur.isConstant() && cur.constantValue64(true) + 1u < 2u; + + if (!prev_is_constant) { + if (!cur_is_constant) { + Temp tmp1 = bld.tmp(bld.lm), tmp2 = bld.tmp(bld.lm); + bld.sop2(Builder::s_andn2, Definition(tmp1), bld.def(s1, scc), prev, Operand(exec, bld.lm)); + bld.sop2(Builder::s_and, Definition(tmp2), bld.def(s1, scc), cur, Operand(exec, bld.lm)); + bld.sop2(Builder::s_or, dst, bld.def(s1, scc), tmp1, tmp2); + } else if (cur.constantValue64(true)) { + bld.sop2(Builder::s_or, dst, bld.def(s1, scc), prev, Operand(exec, bld.lm)); + } else { + bld.sop2(Builder::s_andn2, dst, bld.def(s1, scc), prev, Operand(exec, bld.lm)); + } + } else if (prev.constantValue64(true)) { + if (!cur_is_constant) + bld.sop2(Builder::s_orn2, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm)); + else if (cur.constantValue64(true)) + bld.sop1(Builder::s_mov, dst, program->wave_size == 64 ? Operand(UINT64_MAX) : Operand(UINT32_MAX)); + else + bld.sop1(Builder::s_not, dst, bld.def(s1, scc), Operand(exec, bld.lm)); + } else { + if (!cur_is_constant) + bld.sop2(Builder::s_and, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm)); + else if (cur.constantValue64(true)) + bld.sop1(Builder::s_mov, dst, Operand(exec, bld.lm)); + else + bld.sop1(Builder::s_mov, dst, program->wave_size == 64 ? Operand((uint64_t)0u) : Operand(0u)); + } } void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, aco_ptr& phi) @@ -144,20 +191,9 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, Temp new_cur = {state->writes.at(pred->index), program->lane_mask}; assert(new_cur.regClass() == bld.lm); - if (cur.isUndefined()) { - insert_before_logical_end(pred, bld.sop1(aco_opcode::s_mov_b64, Definition(new_cur), phi->operands[i]).get_ptr()); - } else { - Temp tmp1 = bld.tmp(bld.lm), tmp2 = bld.tmp(bld.lm); - insert_before_logical_end(pred, - bld.sop2(Builder::s_andn2, Definition(tmp1), bld.def(s1, scc), - cur, Operand(exec, bld.lm)).get_ptr()); - insert_before_logical_end(pred, - bld.sop2(Builder::s_and, Definition(tmp2), bld.def(s1, scc), - phi->operands[i].getTemp(), Operand(exec, bld.lm)).get_ptr()); - insert_before_logical_end(pred, - bld.sop2(Builder::s_or, Definition(new_cur), bld.def(s1, scc), - tmp1, tmp2).get_ptr()); - } + if (i == 1 && (block->kind & block_kind_merge) && phi->operands[0].isConstant()) + cur = phi->operands[0]; + build_merge_code(program, pred, Definition(new_cur), cur, phi->operands[i]); } unsigned num_preds = block->linear_preds.size(); -- 2.30.2