From 1d6f667193d3b29d27d6721f694af290510a4e60 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 7 May 2020 18:15:59 +0100 Subject: [PATCH] aco: coalesce copies more aggressively when lowering to hw Helps some Detroit : Become Human shaders. Totals from affected shaders: (VEGA) Code Size: 9880420 -> 9879088 (-0.01 %) bytes Instructions: 1918553 -> 1918220 (-0.02 %) Copies: 177783 -> 177450 (-0.19 %) Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index b0b8701720b..323dfdadf98 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1223,6 +1223,26 @@ void handle_operands(std::map& copy_map, lower_context* it->second.bytes = 8; } + /* try to coalesce copies */ + if (it->second.bytes < 8 && !it->second.op.isConstant() && + it->first.reg_b % util_next_power_of_two(it->second.bytes + 1) == 0 && + it->second.op.physReg().reg_b % util_next_power_of_two(it->second.bytes + 1) == 0) { + // TODO try more relaxed alignment for subdword copies + PhysReg other_def_reg = it->first; + other_def_reg.reg_b += it->second.bytes; + PhysReg other_op_reg = it->second.op.physReg(); + other_op_reg.reg_b += it->second.bytes; + std::map::iterator other = copy_map.find(other_def_reg); + if (other != copy_map.end() && + other->second.op.physReg() == other_op_reg && + it->second.bytes + other->second.bytes <= 8) { + it->second.bytes += other->second.bytes; + it->second.def = Definition(it->first, RegClass::get(it->second.def.regClass().type(), it->second.bytes)); + it->second.op = Operand(it->second.op.physReg(), RegClass::get(it->second.op.regClass().type(), it->second.bytes)); + copy_map.erase(other); + } + } + /* check if the definition reg is used by another copy operation */ for (std::pair& copy : copy_map) { if (copy.second.op.isConstant()) -- 2.30.2