aco: align swap operations to 4 bytes on GFX6/7
authorDaniel Schürmann <daniel@schuermann.dev>
Tue, 23 Jun 2020 10:55:34 +0000 (11:55 +0100)
committerMarge Bot <eric+marge@anholt.net>
Wed, 24 Jun 2020 10:52:28 +0000 (10:52 +0000)
GFX6/7 can only swap full registers

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5207>

src/amd/compiler/aco_lower_to_hw_instr.cpp

index 10ba9b82d3aac80d8e0486c971aaf6af43fcb5a4..b208bb6fd342da42420e4ba66a2b77bea9bd5a1e 100644 (file)
@@ -1159,7 +1159,7 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool
       Definition op_as_def = Definition(op.physReg(), op.regClass());
       if (ctx->program->chip_class >= GFX9 && def.regClass() == v1) {
          bld.vop1(aco_opcode::v_swap_b32, def, op_as_def, op, def_as_op);
-      } else if (def.regClass() == v1 || (def.regClass().is_subdword() && ctx->program->chip_class < GFX8)) {
+      } else if (def.regClass() == v1) {
          assert(def.physReg().byte() == 0 && op.physReg().byte() == 0);
          bld.vop2(aco_opcode::v_xor_b32, op_as_def, op, def_as_op);
          bld.vop2(aco_opcode::v_xor_b32, def, op, def_as_op);
@@ -1561,6 +1561,10 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
          swap.bytes = offset;
       }
 
+      /* GFX6-7 can only swap full registers */
+      if (ctx->program->chip_class <= GFX7)
+         swap.bytes = align(swap.bytes, 4);
+
       do_swap(ctx, bld, swap, preserve_scc, pi);
 
       /* remove from map */