From 7cad27831d27b668f592a7b8fb2b09b6f443cd8f Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 24 Jun 2020 13:23:12 +0100 Subject: [PATCH] aco: ignore blocked registers when checking edges in get_reg_impl() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If the only two registers available are consecutive and used by killed operands, both of them will be blocked and fail the edge check. Totals from 903 (0.66% of 135946) affected shaders: VGPRs: 30892 -> 30884 (-0.03%) CodeSize: 1584468 -> 1584044 (-0.03%); split: -0.05%, +0.02% MaxWaves: 14374 -> 14378 (+0.03%) Instrs: 306482 -> 306399 (-0.03%); split: -0.06%, +0.03% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 9af9bdfe235..a59a9af7add 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -185,6 +185,13 @@ public: return false; } + bool is_empty_or_blocked(PhysReg start) { + if (regs[start] == 0xF0000000) { + return subdword_regs[start][start.byte()] + 1 <= 1; + } + return regs[start] + 1 <= 1; + } + void clear(PhysReg start, RegClass rc) { if (rc.is_subdword()) fill_subdword(start, rc.bytes(), 0); @@ -947,9 +954,11 @@ std::pair get_reg_impl(ra_ctx& ctx, unsigned reg_hi = lb + size - 1; for (reg_lo = lb, reg_hi = lb + size - 1; reg_hi < ub; reg_lo += stride, reg_hi += stride) { /* first check the edges: this is what we have to fix to allow for num_moves > size */ - if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1))) + if (reg_lo > lb && !reg_file.is_empty_or_blocked(PhysReg(reg_lo)) && + reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1))) continue; - if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4))) + if (reg_hi < ub - 1 && !reg_file.is_empty_or_blocked(PhysReg(reg_hi).advance(3)) && + reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4))) continue; /* second, check that we have at most k=num_moves elements in the window -- 2.30.2