From: Daniel Schürmann Date: Thu, 2 Apr 2020 16:50:46 +0000 (+0100) Subject: aco: implement vec2/3/4 with subdword operands X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5718347c2b42ee25e5377d40024aaaa929889c44;p=mesa.git aco: implement vec2/3/4 with subdword operands Reviewed-by: Rhys Perry Reviewed-By: Timur Kristóf Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index c74c4344f79..31dc06b1a87 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -941,14 +941,38 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_vec3: case nir_op_vec4: { std::array elems; - aco_ptr vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, instr->dest.dest.ssa.num_components, 1)}; - for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i) { + unsigned num = instr->dest.dest.ssa.num_components; + for (unsigned i = 0; i < num; ++i) elems[i] = get_alu_src(ctx, instr->src[i]); - vec->operands[i] = Operand{elems[i]}; + + if (instr->dest.dest.ssa.bit_size >= 32 || dst.type() == RegType::vgpr) { + aco_ptr vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, instr->dest.dest.ssa.num_components, 1)}; + for (unsigned i = 0; i < num; ++i) + vec->operands[i] = Operand{elems[i]}; + vec->definitions[0] = Definition(dst); + ctx->block->instructions.emplace_back(std::move(vec)); + ctx->allocated_vec.emplace(dst.id(), elems); + } else { + // TODO: that is a bit suboptimal.. + Temp mask = bld.copy(bld.def(s1), Operand((1u << instr->dest.dest.ssa.bit_size) - 1)); + for (unsigned i = 0; i < num - 1; ++i) + if (((i+1) * instr->dest.dest.ssa.bit_size) % 32) + elems[i] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), elems[i], mask); + for (unsigned i = 0; i < num; ++i) { + unsigned bit = i * instr->dest.dest.ssa.bit_size; + if (bit % 32 == 0) { + elems[bit / 32] = elems[i]; + } else { + elems[i] = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), + elems[i], Operand((i * instr->dest.dest.ssa.bit_size) % 32)); + elems[bit / 32] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), elems[bit / 32], elems[i]); + } + } + if (dst.size() == 1) + bld.copy(Definition(dst), elems[0]); + else + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), elems[0], elems[1]); } - vec->definitions[0] = Definition(dst); - ctx->block->instructions.emplace_back(std::move(vec)); - ctx->allocated_vec.emplace(dst.id(), elems); break; } case nir_op_mov: {