From edf863d1d29f7afbca2d53dca963e8fa0362b8a6 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 15 May 2020 15:25:44 +0100 Subject: [PATCH] aco: use Info::definition_size instead of definition's regclass MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 16-bit abs/neg creates v_xor_b32/v_and_b32 with v2b definitions. These instructions never do partial writes without SDWA. No shader-db changes. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 7 ++++--- src/amd/compiler/aco_validate.cpp | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index a198c9312c4..74b52301ad3 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -411,7 +411,7 @@ std::pair get_subdword_definition_info(Program *program, con else if (instr->format == Format::PSEUDO) return std::make_pair(4, rc.size() * 4u); - bool can_do_partial = chip >= GFX10; + unsigned bytes_written = chip >= GFX10 ? rc.bytes() : 4u; switch (instr->opcode) { case aco_opcode::v_mad_f16: case aco_opcode::v_mad_u16: @@ -419,11 +419,12 @@ std::pair get_subdword_definition_info(Program *program, con case aco_opcode::v_fma_f16: case aco_opcode::v_div_fixup_f16: case aco_opcode::v_interp_p2_f16: - can_do_partial = chip >= GFX9; + bytes_written = chip >= GFX9 ? rc.bytes() : 4u; break; default: break; } + bytes_written = MAX2(bytes_written, instr_info.definition_size[(int)instr->opcode] / 8u); if (can_use_SDWA(chip, instr)) { return std::make_pair(rc.bytes(), rc.bytes()); @@ -450,7 +451,7 @@ std::pair get_subdword_definition_info(Program *program, con break; } - return std::make_pair(4u, can_do_partial ? rc.bytes() : 4u); + return std::make_pair(4u, bytes_written); } void add_subdword_definition(Program *program, aco_ptr& instr, unsigned idx, PhysReg reg, bool is_partial) diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 6d7c0c3e794..d4ba88e014a 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -568,7 +568,7 @@ unsigned get_subdword_bytes_written(Program *program, const aco_ptr break; } - return chip >= GFX10 ? def.bytes() : 4; + return MAX2(chip >= GFX10 ? def.bytes() : 4, instr_info.definition_size[(int)instr->opcode] / 8u); } } /* end namespace */ -- 2.30.2