From 5718347c2b42ee25e5377d40024aaaa929889c44 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 2 Apr 2020 17:50:46 +0100 Subject: [PATCH] aco: implement vec2/3/4 with subdword operands MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Rhys Perry Reviewed-By: Timur Kristóf Part-of: --- .../compiler/aco_instruction_selection.cpp | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index c74c4344f79..31dc06b1a87 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -941,14 +941,38 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_vec3: case nir_op_vec4: { std::array elems; - aco_ptr vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, instr->dest.dest.ssa.num_components, 1)}; - for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i) { + unsigned num = instr->dest.dest.ssa.num_components; + for (unsigned i = 0; i < num; ++i) elems[i] = get_alu_src(ctx, instr->src[i]); - vec->operands[i] = Operand{elems[i]}; + + if (instr->dest.dest.ssa.bit_size >= 32 || dst.type() == RegType::vgpr) { + aco_ptr vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, instr->dest.dest.ssa.num_components, 1)}; + for (unsigned i = 0; i < num; ++i) + vec->operands[i] = Operand{elems[i]}; + vec->definitions[0] = Definition(dst); + ctx->block->instructions.emplace_back(std::move(vec)); + ctx->allocated_vec.emplace(dst.id(), elems); + } else { + // TODO: that is a bit suboptimal.. + Temp mask = bld.copy(bld.def(s1), Operand((1u << instr->dest.dest.ssa.bit_size) - 1)); + for (unsigned i = 0; i < num - 1; ++i) + if (((i+1) * instr->dest.dest.ssa.bit_size) % 32) + elems[i] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), elems[i], mask); + for (unsigned i = 0; i < num; ++i) { + unsigned bit = i * instr->dest.dest.ssa.bit_size; + if (bit % 32 == 0) { + elems[bit / 32] = elems[i]; + } else { + elems[i] = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), + elems[i], Operand((i * instr->dest.dest.ssa.bit_size) % 32)); + elems[bit / 32] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), elems[bit / 32], elems[i]); + } + } + if (dst.size() == 1) + bld.copy(Definition(dst), elems[0]); + else + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), elems[0], elems[1]); } - vec->definitions[0] = Definition(dst); - ctx->block->instructions.emplace_back(std::move(vec)); - ctx->allocated_vec.emplace(dst.id(), elems); break; } case nir_op_mov: { -- 2.30.2