From fdf9b674ee3a17c98fd266750dec3475910542f6 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 22 Apr 2020 21:35:48 -0500 Subject: [PATCH] nir/lower_subgroups: Mask off unused bits in ballot ops Thanks to VK_EXT_subgroup_size_control, we can end up with gl_SubgroupSize being as low as 8 on Intel. Fixes: d10de253097 "anv: Implement VK_EXT_subgroup_size_control" Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/compiler/nir/nir_lower_subgroups.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index f5eebb85144..2a7361b7472 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -446,6 +446,32 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options) assert(intrin->src[0].is_ssa); nir_ssa_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa, options->ballot_bit_size); + + if (intrin->intrinsic != nir_intrinsic_ballot_bitfield_extract && + intrin->intrinsic != nir_intrinsic_ballot_find_lsb) { + /* For OpGroupNonUniformBallotFindMSB, the SPIR-V Spec says: + * + * "Find the most significant bit set to 1 in Value, considering + * only the bits in Value required to represent all bits of the + * group’s invocations. If none of the considered bits is set to + * 1, the result is undefined." + * + * It has similar text for the other three. This means that, in case + * the subgroup size is less than 32, we have to mask off the unused + * bits. If the subgroup size is fixed and greater than or equal to + * 32, the mask will be 0xffffffff and nir_opt_algebraic will delete + * the iand. + * + * We only have to worry about this for BitCount and FindMSB because + * FindLSB counts from the bottom and BitfieldExtract selects + * individual bits. In either case, if run outside the range of + * valid bits, we hit the undefined results case and we can return + * anything we want. + */ + int_val = nir_iand(b, int_val, + build_subgroup_mask(b, options->ballot_bit_size, options)); + } + switch (intrin->intrinsic) { case nir_intrinsic_ballot_bitfield_extract: assert(intrin->src[1].is_ssa); -- 2.30.2