From a6a731bea5e835a49c0ff12b7d729ba761d3296f Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 2 Jul 2020 12:15:28 +0100 Subject: [PATCH] aco: implement <32-bit masked_swizzle_amd MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is needed since we will be lowering some 8/16-bit shuffles to masked_swizzle_amd. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3b82e46e33f..138cf7de909 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7951,10 +7951,20 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) } Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); uint32_t mask = nir_intrinsic_swizzle_mask(instr); - if (dst.regClass() == v1) { - emit_wqm(ctx, - emit_masked_swizzle(ctx, bld, src, mask), - dst); + if (instr->dest.ssa.bit_size == 1) { + assert(src.regClass() == bld.lm); + src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand((uint32_t)-1), src); + src = emit_masked_swizzle(ctx, bld, src, mask); + Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src); + emit_wqm(ctx, tmp, dst); + } else if (dst.regClass() == v1b) { + Temp tmp = emit_wqm(ctx, emit_masked_swizzle(ctx, bld, src, mask)); + emit_extract_vector(ctx, tmp, 0, dst); + } else if (dst.regClass() == v2b) { + Temp tmp = emit_wqm(ctx, emit_masked_swizzle(ctx, bld, src, mask)); + emit_extract_vector(ctx, tmp, 0, dst); + } else if (dst.regClass() == v1) { + emit_wqm(ctx, emit_masked_swizzle(ctx, bld, src, mask), dst); } else if (dst.regClass() == v2) { Temp lo = bld.tmp(v1), hi = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); -- 2.30.2