From 21db083504fde3100f0b528f683a087357f5aaff Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Thu, 28 Nov 2019 10:41:19 +0100 Subject: [PATCH] aco/wave32: Allow setting the subgroup ballot size to 64-bit. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Previously, it would only work when the ballot size was set to the lane mask. This patch makes is possible to set the ballot size to either 32-bit or 64-bit for wave32 mode. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann --- src/amd/compiler/aco_instruction_selection.cpp | 11 ++++++++--- src/amd/compiler/aco_instruction_selection_setup.cpp | 1 - 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 70cee225670..84c88e4eaa5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5709,18 +5709,23 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) Temp src = get_ssa_temp(ctx, instr->src[0].ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Definition tmp = bld.def(dst.regClass()); + Definition lanemask_tmp = dst.size() == bld.lm.size() ? tmp : bld.def(src.regClass()); if (instr->src[0].ssa->bit_size == 1) { assert(src.regClass() == bld.lm); - bld.sop2(Builder::s_and, tmp, bld.def(s1, scc), Operand(exec, bld.lm), src); + bld.sop2(Builder::s_and, lanemask_tmp, bld.def(s1, scc), Operand(exec, bld.lm), src); } else if (instr->src[0].ssa->bit_size == 32 && src.regClass() == v1) { - bld.vopc(aco_opcode::v_cmp_lg_u32, tmp, Operand(0u), src); + bld.vopc(aco_opcode::v_cmp_lg_u32, lanemask_tmp, Operand(0u), src); } else if (instr->src[0].ssa->bit_size == 64 && src.regClass() == v2) { - bld.vopc(aco_opcode::v_cmp_lg_u64, tmp, Operand(0u), src); + bld.vopc(aco_opcode::v_cmp_lg_u64, lanemask_tmp, Operand(0u), src); } else { fprintf(stderr, "Unimplemented NIR instr bit size: "); nir_print_instr(&instr->instr, stderr); fprintf(stderr, "\n"); } + if (dst.size() != bld.lm.size()) { + /* Wave32 with ballot size set to 64 */ + bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), lanemask_tmp.getTemp(), Operand(0u)); + } emit_wqm(ctx, tmp.getTemp(), dst); break; } diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index a7446c6c058..47f5778822f 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -294,7 +294,6 @@ void init_context(isel_context *ctx, nir_shader *shader) break; case nir_intrinsic_ballot: type = RegType::sgpr; - size = lane_mask_size; break; case nir_intrinsic_load_sample_id: case nir_intrinsic_load_sample_mask_in: -- 2.30.2