From 62ea429a9935582636c87c38210408ccba0477da Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 12 May 2020 15:08:05 +0100 Subject: [PATCH] aco: prefer 4-byte aligned definitions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit shader-db (Navi, fp16 enabled): Totals from 42 (0.03% of 127638) affected shaders: CodeSize: 811984 -> 806224 (-0.71%) Instrs: 155733 -> 155939 (+0.13%); split: -0.04%, +0.18% Cycles: 1982568 -> 1984400 (+0.09%); split: -0.06%, +0.15% VMEM: 7187 -> 7121 (-0.92%); split: +0.86%, -1.78% SMEM: 1770 -> 1769 (-0.06%) VClause: 1475 -> 1476 (+0.07%) Copies: 12406 -> 12606 (+1.61%); split: -0.46%, +2.07% Branches: 5901 -> 5900 (-0.02%); split: -0.25%, +0.24% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 60 ++++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 6a1e2b78c56..a198c9312c4 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -567,38 +567,9 @@ std::pair get_reg_simple(ra_ctx& ctx, uint32_t lb = info.lb; uint32_t ub = info.ub; uint32_t size = info.size; - uint32_t stride = info.stride; + uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride; RegClass rc = info.rc; - if (rc.is_subdword()) { - for (std::pair> entry : reg_file.subdword_regs) { - assert(reg_file[entry.first] == 0xF0000000); - if (lb > entry.first || entry.first >= ub) - continue; - - for (unsigned i = 0; i < 4; i+= stride) { - if (entry.second[i] != 0) - continue; - - bool reg_found = true; - for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++) - reg_found &= entry.second[i + j] == 0; - - /* check neighboring reg if needed */ - reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0); - if (reg_found) { - PhysReg res{entry.first}; - res.reg_b += i; - adjust_max_used_regs(ctx, rc, entry.first); - return {res, true}; - } - } - } - - stride = 1; /* stride in full registers */ - rc = info.rc = RegClass(RegType::vgpr, size); - } - if (stride == 1) { for (unsigned stride = 8; stride > 1; stride /= 2) { @@ -689,6 +660,35 @@ std::pair get_reg_simple(ra_ctx& ctx, reg_lo += stride; } + /* do this late because using the upper bytes of a register can require + * larger instruction encodings or copies + * TODO: don't do this in situations where it doesn't benefit */ + if (rc.is_subdword()) { + for (std::pair> entry : reg_file.subdword_regs) { + assert(reg_file[entry.first] == 0xF0000000); + if (lb > entry.first || entry.first >= ub) + continue; + + for (unsigned i = 0; i < 4; i+= info.stride) { + if (entry.second[i] != 0) + continue; + + bool reg_found = true; + for (unsigned j = 1; reg_found && i + j < 4 && j < rc.bytes(); j++) + reg_found &= entry.second[i + j] == 0; + + /* check neighboring reg if needed */ + reg_found &= ((int)i <= 4 - (int)rc.bytes() || reg_file[entry.first + 1] == 0); + if (reg_found) { + PhysReg res{entry.first}; + res.reg_b += i; + adjust_max_used_regs(ctx, rc, entry.first); + return {res, true}; + } + } + } + } + return {{}, false}; } -- 2.30.2