From: Rhys Perry Date: Fri, 21 Aug 2020 12:12:38 +0000 (+0100) Subject: aco: fix sgpr ubfe/ibfe if the offset is too large X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=14d748eb28efa57507a3a84b7ef157b27ab27752 aco: fix sgpr ubfe/ibfe if the offset is too large If the offset is large enough, it could affect the width. I'm also not sure if the hardware masks the offset by 0x1f. Found by inspection. No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 2beb0251d8a..f156acc535a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2765,24 +2765,25 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) unreachable("Unsupported BFE bit size"); if (dst.type() == RegType::sgpr) { - Operand extract; nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src); nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src); if (const_offset && const_bits) { - uint32_t const_extract = (const_bits->u32 << 16) | const_offset->u32; - extract = Operand(const_extract); + uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f); + aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; + bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand(extract)); + } else if (instr->op == nir_op_ubfe) { + Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset); + Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask); + bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset); } else { - Operand width; - if (const_bits) { - width = Operand(const_bits->u32 << 16); - } else { - width = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u)); - } - extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), offset, width); - } + Operand bits_op = const_bits ? Operand(const_bits->u32 << 16) : + bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u)); + Operand offset_op = const_offset ? Operand(const_offset->u32 & 0x1fu) : + bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand(0x1fu)); - aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; - bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract); + Temp extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op); + bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract); + } } else { aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32;