From: Rhys Perry Date: Tue, 21 Apr 2020 18:37:25 +0000 (+0100) Subject: aco: improve code for 32-bit isign X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0d9fe0405f5520716703d61544c6d899f051aa8b;p=mesa.git aco: improve code for 32-bit isign No shader-db changes on Navi. Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index a76f1bb25af..acf8b36912a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1130,9 +1130,8 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_isign: { Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == s1) { - Temp tmp = bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), src, Operand(31u)); - Temp gtz = bld.sopc(aco_opcode::s_cmp_gt_i32, bld.def(s1, scc), src, Operand(0u)); - bld.sop2(aco_opcode::s_add_i32, Definition(dst), bld.def(s1, scc), gtz, tmp); + Temp tmp = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), src, Operand((uint32_t)-1)); + bld.sop2(aco_opcode::s_min_i32, Definition(dst), bld.def(s1, scc), tmp, Operand(1u)); } else if (dst.regClass() == s2) { Temp neg = bld.sop2(aco_opcode::s_ashr_i64, bld.def(s2), bld.def(s1, scc), src, Operand(63u)); Temp neqz; @@ -1143,9 +1142,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) /* SCC gets zero-extended to 64 bit */ bld.sop2(aco_opcode::s_or_b64, Definition(dst), bld.def(s1, scc), neg, bld.scc(neqz)); } else if (dst.regClass() == v1) { - Temp tmp = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), src); - Temp gtz = bld.vopc(aco_opcode::v_cmp_ge_i32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(1u), tmp, gtz); + bld.vop3(aco_opcode::v_med3_i32, Definition(dst), Operand((uint32_t)-1), src, Operand(1u)); } else if (dst.regClass() == v2) { Temp upper = emit_extract_vector(ctx, src, 1, v1); Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), upper);