From 7ba645d5cb3974d90db0dc7f78d935c7be29448d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 23 Jun 2020 17:37:37 +0100 Subject: [PATCH] nir/lower_subgroups: add lower_shuffle_to_swizzle_amd MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit masked_swizzle_amd can be much faster than shuffle. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_lower_subgroups.c | 37 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9f2a90831f0..e0caedaf5f4 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4153,6 +4153,7 @@ typedef struct nir_lower_subgroups_options { bool lower_subgroup_masks:1; bool lower_shuffle:1; bool lower_shuffle_to_32bit:1; + bool lower_shuffle_to_swizzle_amd:1; bool lower_quad:1; bool lower_quad_broadcast_dynamic:1; bool lower_quad_broadcast_dynamic_to_const:1; diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c index 5244d916fd9..541544e2474 100644 --- a/src/compiler/nir/nir_lower_subgroups.c +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -222,10 +222,47 @@ lower_vote_eq_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin, nir_imm_intN_t(b, 0, options->ballot_bit_size)); } +static nir_ssa_def * +lower_shuffle_to_swizzle(nir_builder *b, nir_intrinsic_instr *intrin, + const nir_lower_subgroups_options *options) +{ + unsigned mask = nir_src_as_uint(intrin->src[1]); + + if (mask >= 32) + return NULL; + + nir_intrinsic_instr *swizzle = nir_intrinsic_instr_create( + b->shader, nir_intrinsic_masked_swizzle_amd); + swizzle->num_components = intrin->num_components; + nir_src_copy(&swizzle->src[0], &intrin->src[0], swizzle); + nir_intrinsic_set_swizzle_mask(swizzle, (mask << 10) | 0x1f); + nir_ssa_dest_init(&swizzle->instr, &swizzle->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.bit_size, NULL); + + if (options->lower_to_scalar && swizzle->num_components > 1) { + return lower_subgroup_op_to_scalar(b, swizzle, options->lower_shuffle_to_32bit); + } else if (options->lower_shuffle_to_32bit && swizzle->src[0].ssa->bit_size == 64) { + return lower_subgroup_op_to_32bit(b, swizzle); + } else { + nir_builder_instr_insert(b, &swizzle->instr); + return &swizzle->dest.ssa; + } +} + static nir_ssa_def * lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin, const nir_lower_subgroups_options *options) { + if (intrin->intrinsic == nir_intrinsic_shuffle_xor && + options->lower_shuffle_to_swizzle_amd && + nir_src_is_const(intrin->src[1])) { + nir_ssa_def *result = + lower_shuffle_to_swizzle(b, intrin, options); + if (result) + return result; + } + nir_ssa_def *index = nir_load_subgroup_invocation(b); bool is_shuffle = false; switch (intrin->intrinsic) { -- 2.30.2