From 897a47d84771fb367fa82fd9656b3de20197952c Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 18 Jun 2020 13:30:50 +0100 Subject: [PATCH] aco: fix edge check with sub-dword temporaries MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fixes RA failure for a parallel-rdp shader on pitcairn. fossil-db (Navi): Totals from 2 (0.00% of 128733) affected shaders: CodeSize: 203656 -> 205724 (+1.02%) Instrs: 32267 -> 32529 (+0.81%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_ir.h | 2 +- src/amd/compiler/aco_register_allocation.cpp | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 3db6b4b6d43..a165a1681ad 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -307,7 +307,7 @@ struct PhysReg { constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; } constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; } constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; } - constexpr PhysReg advance(unsigned bytes) const { PhysReg res = *this; res.reg_b += bytes; return res; } + constexpr PhysReg advance(int bytes) const { PhysReg res = *this; res.reg_b += bytes; return res; } uint16_t reg_b = 0; }; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 985aae4cafd..096fe80f6ea 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -214,6 +214,10 @@ public: clear(def.physReg(), def.regClass()); } + unsigned get_id(PhysReg reg) { + return regs[reg] == 0xF0000000 ? subdword_regs[reg][reg.byte()] : regs[reg]; + } + private: void fill(PhysReg start, unsigned size, uint32_t val) { for (unsigned i = 0; i < size; i++) @@ -942,9 +946,9 @@ std::pair get_reg_impl(ra_ctx& ctx, unsigned reg_hi = lb + size - 1; for (reg_lo = lb, reg_hi = lb + size - 1; reg_hi < ub; reg_lo += stride, reg_hi += stride) { /* first check the edges: this is what we have to fix to allow for num_moves > size */ - if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file[reg_lo] == reg_file[reg_lo - 1]) + if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1))) continue; - if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file[reg_hi] == reg_file[reg_hi + 1]) + if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4))) continue; /* second, check that we have at most k=num_moves elements in the window @@ -1300,9 +1304,9 @@ PhysReg get_reg_create_vector(ra_ctx& ctx, // TODO: this can be improved */ if (reg_lo < lb || reg_hi >= ub || reg_lo % stride != 0) continue; - if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file[reg_lo] == reg_file[reg_lo - 1]) + if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1))) continue; - if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file[reg_hi] == reg_file[reg_hi + 1]) + if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4))) continue; /* count variables to be moved and check war_hint */ -- 2.30.2