From 3cb98ed939e0607b6342301e20f42e66d36879a6 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 20 Nov 2019 16:31:43 +0000 Subject: [PATCH] aco: combine two sgprs into a VALU if they're the same MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This was supposed to be done before but it wasn't done correctly and everywhere. pipeline-db (Navi): Totals from affected shaders: SGPRS: 784680 -> 786128 (0.18 %) VGPRS: 574012 -> 573892 (-0.02 %) Spilled SGPRs: 461 -> 461 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 45477088 -> 45478172 (0.00 %) bytes Max Waves: 81294 -> 81277 (-0.02 %) Instructions: 8657970 -> 8622483 (-0.41 %) pipeline-db (Vega): Totals from affected shaders: SGPRS: 780664 -> 782072 (0.18 %) VGPRS: 573880 -> 573760 (-0.02 %) Spilled SGPRs: 629 -> 629 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Code Size: 45445244 -> 45448340 (0.01 %) bytes Max Waves: 81178 -> 81161 (-0.02 %) Instructions: 8649902 -> 8614918 (-0.40 %) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index e7a2da1bcb1..a589fbbe73e 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1207,13 +1207,16 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr& instr) Temp op1 = op_instr[i]->operands[1].getTemp(); if (original_temp_id(ctx, op0) != original_temp_id(ctx, op1)) return false; - /* shouldn't happen yet, but best to be safe */ - if (op1.type() != RegType::vgpr) - return false; op[i] = op1; } + if (op[1].type() == RegType::sgpr) + std::swap(op[0], op[1]); + //TODO: we can use two different SGPRs on GFX10 + if (op[0].type() == RegType::sgpr && op[1].type() == RegType::sgpr) + return false; + ctx.uses[op[0].id()]++; ctx.uses[op[1].id()]++; decrease_uses(ctx, op_instr[0]); @@ -1919,7 +1922,7 @@ void apply_sgprs(opt_ctx &ctx, aco_ptr& instr) /* we can have two sgprs on one instruction if it is the same sgpr! */ } else if (sgpr_info_id != 0 && - sgpr_ssa_id == sgpr_info_id && + sgpr_ssa_id == ctx.info[sgpr_info_id].temp.id() && ctx.uses[sgpr_info_id] == 1 && can_use_VOP3(instr)) { to_VOP3(ctx, instr); @@ -2133,6 +2136,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr bool clamp = false; bool need_vop3 = false; int num_sgpr = 0; + unsigned cur_sgpr = 0; op[0] = mul_instr->operands[0]; op[1] = mul_instr->operands[1]; op[2] = instr->operands[add_op_idx]; @@ -2140,8 +2144,10 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr { if (op[i].isLiteral()) return; - if (op[i].isTemp() && op[i].getTemp().type() == RegType::sgpr) + if (op[i].isTemp() && op[i].getTemp().type() == RegType::sgpr && op[i].tempId() != cur_sgpr) { num_sgpr++; + cur_sgpr = op[i].tempId(); + } if (!(i == 0 || (op[i].isTemp() && op[i].getTemp().type() == RegType::vgpr))) need_vop3 = true; } -- 2.30.2