From f13049f48a068b435f3dfb24c9af801475f16fdb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 17 Apr 2020 14:33:34 +0100 Subject: [PATCH] aco: implement 64-bit sgpr swaps MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In our pipeline-db, helps almost exclusively Detroit: Become Human. Totals from 6726 (5.36% of 125503) affected shaders: CodeSize: 74680952 -> 74102228 (-0.77%) Instrs: 14551507 -> 14406001 (-1.00%) Cycles: 1748272436 -> 1690173104 (-3.32%) VMEM: 964671 -> 964058 (-0.06%) Copies: 1993312 -> 1847806 (-7.30%) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 4a9cc9c9c62..da381dc7ced 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -818,7 +818,7 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool for (; offset < copy.bytes;) { Definition def; Operand op; - split_copy(offset, &def, &op, copy, true, 4); + split_copy(offset, &def, &op, copy, true, 8); assert(op.regClass() == def.regClass()); Operand def_as_op = Operand(def.physReg(), def.regClass()); @@ -852,6 +852,15 @@ void do_swap(lower_context *ctx, Builder& bld, const copy_operation& copy, bool bld.sop2(aco_opcode::s_xor_b32, op_as_def, Definition(scc, s1), op, def_as_op); } ctx->program->statistics[statistic_copies] += 3; + } else if (def.regClass() == s2) { + if (preserve_scc) + bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1)); + bld.sop2(aco_opcode::s_xor_b64, op_as_def, Definition(scc, s1), op, def_as_op); + bld.sop2(aco_opcode::s_xor_b64, def, Definition(scc, s1), op, def_as_op); + bld.sop2(aco_opcode::s_xor_b64, op_as_def, Definition(scc, s1), op, def_as_op); + if (preserve_scc) + bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(pi->scratch_sgpr, s1), Operand(0u)); + ctx->program->statistics[statistic_copies] += 3; } else if (ctx->program->chip_class >= GFX9 && def.bytes() == 2 && def.physReg().reg() == op.physReg().reg()) { aco_ptr vop3p{create_instruction(aco_opcode::v_pk_add_u16, Format::VOP3P, 2, 1)}; vop3p->operands[0] = Operand(PhysReg{op.physReg().reg()}, v1); -- 2.30.2