From 338d03090f9d20510eba0672d90943b8ecff64bc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Mon, 18 Nov 2019 16:11:19 +0100 Subject: [PATCH] aco: Allow optimizing vote_all and nir_op_iand. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit By adding an extra instruction, we can replace the operands of the s_cselect_b64, which allows it to get picked up by the optimizer when it looks for uniform booleans. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_instruction_selection.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8af2e5eadf0..996b06d8534 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5300,7 +5300,8 @@ Temp emit_boolean_reduce(isel_context *ctx, nir_op op, unsigned cluster_size, Te } else if (op == nir_op_iand && cluster_size == ctx->program->wave_size) { //subgroupAnd(val) -> (exec & ~val) == 0 Temp tmp = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src).def(1).getTemp(); - return bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(0u), Operand(-1u), bld.scc(tmp)); + Temp all = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), bld.scc(tmp), Operand(0u)); + return bool_to_vector_condition(ctx, all); } else if (op == nir_op_ior && cluster_size == ctx->program->wave_size) { //subgroupOr(val) -> (val & exec) != 0 Temp tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)).def(1).getTemp(); @@ -5905,8 +5906,8 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) assert(dst.regClass() == bld.lm); Temp tmp = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src).def(1).getTemp(); - Temp val = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(0u), Operand(-1u), bld.scc(tmp)); - emit_wqm(ctx, val, dst); + Temp all = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), bld.scc(tmp), Operand(0u)); + bool_to_vector_condition(ctx, emit_wqm(ctx, all), dst); break; } case nir_intrinsic_vote_any: { @@ -5915,9 +5916,8 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) assert(src.regClass() == bld.lm); assert(dst.regClass() == bld.lm); - Temp tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src).def(1).getTemp(); - Temp val = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), Operand(0u), bld.scc(tmp)); - emit_wqm(ctx, val, dst); + Temp tmp = bool_to_scalar_condition(ctx, src); + bool_to_vector_condition(ctx, emit_wqm(ctx, tmp), dst); break; } case nir_intrinsic_reduce: -- 2.30.2