From 14d748eb28efa57507a3a84b7ef157b27ab27752 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 21 Aug 2020 13:12:38 +0100 Subject: [PATCH] aco: fix sgpr ubfe/ibfe if the offset is too large MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If the offset is large enough, it could affect the width. I'm also not sure if the hardware masks the offset by 0x1f. Found by inspection. No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 2beb0251d8a..f156acc535a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2765,24 +2765,25 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) unreachable("Unsupported BFE bit size"); if (dst.type() == RegType::sgpr) { - Operand extract; nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src); nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src); if (const_offset && const_bits) { - uint32_t const_extract = (const_bits->u32 << 16) | const_offset->u32; - extract = Operand(const_extract); + uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f); + aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; + bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand(extract)); + } else if (instr->op == nir_op_ubfe) { + Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset); + Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask); + bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset); } else { - Operand width; - if (const_bits) { - width = Operand(const_bits->u32 << 16); - } else { - width = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u)); - } - extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), offset, width); - } + Operand bits_op = const_bits ? Operand(const_bits->u32 << 16) : + bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u)); + Operand offset_op = const_offset ? Operand(const_offset->u32 & 0x1fu) : + bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand(0x1fu)); - aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; - bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract); + Temp extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op); + bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract); + } } else { aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32; -- 2.30.2