From 76b5d72921ced04fb8796b7a23a468fc1a4735e3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 23 Jun 2020 11:55:34 +0100 Subject: [PATCH] aco: align swap operations to 4 bytes on GFX6/7 GFX6/7 can only swap full registers Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 10ba9b82d3a..b208bb6fd34 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1159,7 +1159,7 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool Definition op_as_def = Definition(op.physReg(), op.regClass()); if (ctx->program->chip_class >= GFX9 && def.regClass() == v1) { bld.vop1(aco_opcode::v_swap_b32, def, op_as_def, op, def_as_op); - } else if (def.regClass() == v1 || (def.regClass().is_subdword() && ctx->program->chip_class < GFX8)) { + } else if (def.regClass() == v1) { assert(def.physReg().byte() == 0 && op.physReg().byte() == 0); bld.vop2(aco_opcode::v_xor_b32, op_as_def, op, def_as_op); bld.vop2(aco_opcode::v_xor_b32, def, op, def_as_op); @@ -1561,6 +1561,10 @@ void handle_operands(std::map& copy_map, lower_context* swap.bytes = offset; } + /* GFX6-7 can only swap full registers */ + if (ctx->program->chip_class <= GFX7) + swap.bytes = align(swap.bytes, 4); + do_swap(ctx, bld, swap, preserve_scc, pi); /* remove from map */ -- 2.30.2