aco: use Info::definition_size instead of definition's regclass
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 15 May 2020 14:25:44 +0000 (15:25 +0100)
committerMarge Bot <eric+marge@anholt.net>
Wed, 10 Jun 2020 15:05:11 +0000 (15:05 +0000)
16-bit abs/neg creates v_xor_b32/v_and_b32 with v2b definitions. These
instructions never do partial writes without SDWA.

No shader-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5040>

src/amd/compiler/aco_register_allocation.cpp
src/amd/compiler/aco_validate.cpp

index a198c9312c4a61656222f09c99a51974c93e5ada..74b52301ad395516730d65b6970054fc75a2aff7 100644 (file)
@@ -411,7 +411,7 @@ std::pair<unsigned, unsigned> get_subdword_definition_info(Program *program, con
    else if (instr->format == Format::PSEUDO)
       return std::make_pair(4, rc.size() * 4u);
 
-   bool can_do_partial = chip >= GFX10;
+   unsigned bytes_written = chip >= GFX10 ? rc.bytes() : 4u;
    switch (instr->opcode) {
    case aco_opcode::v_mad_f16:
    case aco_opcode::v_mad_u16:
@@ -419,11 +419,12 @@ std::pair<unsigned, unsigned> get_subdword_definition_info(Program *program, con
    case aco_opcode::v_fma_f16:
    case aco_opcode::v_div_fixup_f16:
    case aco_opcode::v_interp_p2_f16:
-      can_do_partial = chip >= GFX9;
+      bytes_written = chip >= GFX9 ? rc.bytes() : 4u;
       break;
    default:
       break;
    }
+   bytes_written = MAX2(bytes_written, instr_info.definition_size[(int)instr->opcode] / 8u);
 
    if (can_use_SDWA(chip, instr)) {
       return std::make_pair(rc.bytes(), rc.bytes());
@@ -450,7 +451,7 @@ std::pair<unsigned, unsigned> get_subdword_definition_info(Program *program, con
       break;
    }
 
-   return std::make_pair(4u, can_do_partial ? rc.bytes() : 4u);
+   return std::make_pair(4u, bytes_written);
 }
 
 void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg, bool is_partial)
index 6d7c0c3e7948e92dad550846232167c1bf9cf882..d4ba88e014a351c24ee6f6dd949823337e7486c1 100644 (file)
@@ -568,7 +568,7 @@ unsigned get_subdword_bytes_written(Program *program, const aco_ptr<Instruction>
       break;
    }
 
-   return chip >= GFX10 ? def.bytes() : 4;
+   return MAX2(chip >= GFX10 ? def.bytes() : 4, instr_info.definition_size[(int)instr->opcode] / 8u);
 }
 
 } /* end namespace */