From 41ac44e1b3ad19b9a889fc3d1052611ae309ffb0 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 10 Apr 2020 13:09:54 +0100 Subject: [PATCH] aco: improve vector optimization with sub-dword vectors MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 33 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 2e83b4e9d74..491df9404ba 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -925,7 +925,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) } if (instr->operands.size() == 1 && instr->operands[0].isTemp()) ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); - else if (instr->definitions[0].getTemp().size() == instr->operands.size()) + else ctx.info[instr->definitions[0].tempId()].set_vec(instr.get()); break; } @@ -933,10 +933,17 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) if (!ctx.info[instr->operands[0].tempId()].is_vec()) break; Instruction* vec = ctx.info[instr->operands[0].tempId()].instr; - if (instr->definitions.size() != vec->operands.size()) - break; - for (unsigned i = 0; i < instr->definitions.size(); i++) { - Operand vec_op = vec->operands[i]; + unsigned split_offset = 0; + unsigned vec_offset = 0; + unsigned vec_index = 0; + for (unsigned i = 0; i < instr->definitions.size(); split_offset += instr->definitions[i++].bytes()) { + while (vec_offset < split_offset && vec_index < vec->operands.size()) + vec_offset += vec->operands[vec_index++].bytes(); + + if (vec_offset != split_offset || vec->operands[vec_index].bytes() != instr->definitions[i].bytes()) + continue; + + Operand vec_op = vec->operands[vec_index]; if (vec_op.isConstant()) { if (vec_op.isLiteral()) ctx.info[instr->definitions[i].tempId()].set_literal(vec_op.constantValue()); @@ -2557,10 +2564,12 @@ void select_instruction(opt_ctx &ctx, aco_ptr& instr) if (instr->opcode == aco_opcode::p_split_vector) { unsigned num_used = 0; unsigned idx = 0; - for (unsigned i = 0; i < instr->definitions.size(); i++) { + unsigned split_offset = 0; + for (unsigned i = 0, offset = 0; i < instr->definitions.size(); offset += instr->definitions[i++].bytes()) { if (ctx.uses[instr->definitions[i].tempId()]) { num_used++; idx = i; + split_offset = offset; } } bool done = false; @@ -2571,13 +2580,13 @@ void select_instruction(opt_ctx &ctx, aco_ptr& instr) unsigned off = 0; Operand op; for (Operand& vec_op : vec->operands) { - if (off == idx * instr->definitions[0].size()) { + if (off == split_offset) { op = vec_op; break; } - off += vec_op.size(); + off += vec_op.bytes(); } - if (off != instr->operands[0].size()) { + if (off != instr->operands[0].bytes() && op.bytes() == instr->definitions[idx].bytes()) { ctx.uses[instr->operands[0].tempId()]--; for (Operand& vec_op : vec->operands) { if (vec_op.isTemp()) @@ -2595,10 +2604,12 @@ void select_instruction(opt_ctx &ctx, aco_ptr& instr) } } - if (!done && num_used == 1) { + if (!done && num_used == 1 && + instr->operands[0].bytes() % instr->definitions[idx].bytes() == 0 && + split_offset % instr->definitions[idx].bytes() == 0) { aco_ptr extract{create_instruction(aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)}; extract->operands[0] = instr->operands[0]; - extract->operands[1] = Operand((uint32_t) idx); + extract->operands[1] = Operand((uint32_t) split_offset / instr->definitions[idx].bytes()); extract->definitions[0] = instr->definitions[idx]; instr.reset(extract.release()); } -- 2.30.2