From 28d36d26c2212276e1238fad8f0b12caab97fee8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 10 Apr 2020 11:52:13 +0100 Subject: [PATCH] aco: fix p_extract_vector optimization in presence of unequally sized vector operands Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 49 ++++++++++++++++-------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 5e80d9afe7f..2e83b4e9d74 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -954,33 +954,38 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) case aco_opcode::p_extract_vector: { /* mov */ if (!ctx.info[instr->operands[0].tempId()].is_vec()) break; + + /* check if we index directly into a vector element */ Instruction* vec = ctx.info[instr->operands[0].tempId()].instr; - if (vec->definitions[0].getTemp().size() == vec->operands.size() && /* TODO: what about 64bit or other combinations? */ - vec->operands[0].size() == instr->definitions[0].size()) { - - /* convert this extract into a mov instruction */ - Operand vec_op = vec->operands[instr->operands[1].constantValue()]; - bool is_vgpr = instr->definitions[0].getTemp().type() == RegType::vgpr; - aco_opcode opcode = is_vgpr ? aco_opcode::v_mov_b32 : aco_opcode::s_mov_b32; - Format format = is_vgpr ? Format::VOP1 : Format::SOP1; - instr->opcode = opcode; - instr->format = format; - while (instr->operands.size() > 1) - instr->operands.pop_back(); - instr->operands[0] = vec_op; + const unsigned index = instr->operands[1].constantValue(); + const unsigned dst_offset = index * instr->definitions[0].bytes(); + unsigned offset = 0; - if (vec_op.isConstant()) { - if (vec_op.isLiteral()) - ctx.info[instr->definitions[0].tempId()].set_literal(vec_op.constantValue()); - else if (vec_op.size() == 1) - ctx.info[instr->definitions[0].tempId()].set_constant(vec_op.constantValue()); - else if (vec_op.size() == 2) - ctx.info[instr->definitions[0].tempId()].set_constant_64bit(vec_op.constantValue()); + for (const Operand& op : vec->operands) { + if (offset < dst_offset) { + offset += op.bytes(); + continue; + } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) { + break; + } + /* convert this extract into a copy instruction */ + instr->opcode = aco_opcode::p_parallelcopy; + instr->operands.pop_back(); + instr->operands[0] = op; + + if (op.isConstant()) { + if (op.isLiteral()) + ctx.info[instr->definitions[0].tempId()].set_literal(op.constantValue()); + else if (op.size() == 1) + ctx.info[instr->definitions[0].tempId()].set_constant(op.constantValue()); + else if (op.size() == 2) + ctx.info[instr->definitions[0].tempId()].set_constant_64bit(op.constantValue()); } else { - assert(vec_op.isTemp()); - ctx.info[instr->definitions[0].tempId()].set_temp(vec_op.getTemp()); + assert(op.isTemp()); + ctx.info[instr->definitions[0].tempId()].set_temp(op.getTemp()); } + break; } break; } -- 2.30.2