nir/lower_subgroups: add lower_shuffle_to_swizzle_amd
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 23 Jun 2020 16:37:37 +0000 (17:37 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 13 Jul 2020 14:11:50 +0000 (14:11 +0000)
masked_swizzle_amd can be much faster than shuffle.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5695>

src/compiler/nir/nir.h
src/compiler/nir/nir_lower_subgroups.c

index 9f2a90831f0462568d94d065ac053472a7fe094f..e0caedaf5f491a74ee3160672e8c8069bcd74fc8 100644 (file)
@@ -4153,6 +4153,7 @@ typedef struct nir_lower_subgroups_options {
    bool lower_subgroup_masks:1;
    bool lower_shuffle:1;
    bool lower_shuffle_to_32bit:1;
+   bool lower_shuffle_to_swizzle_amd:1;
    bool lower_quad:1;
    bool lower_quad_broadcast_dynamic:1;
    bool lower_quad_broadcast_dynamic_to_const:1;
index 5244d916fd9d3aa7a1c0ea58d831c404c8346724..541544e2474cdb0295c6181ae6d2b410b492c234 100644 (file)
@@ -222,10 +222,47 @@ lower_vote_eq_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin,
                   nir_imm_intN_t(b, 0, options->ballot_bit_size));
 }
 
+static nir_ssa_def *
+lower_shuffle_to_swizzle(nir_builder *b, nir_intrinsic_instr *intrin,
+                         const nir_lower_subgroups_options *options)
+{
+   unsigned mask = nir_src_as_uint(intrin->src[1]);
+
+   if (mask >= 32)
+      return NULL;
+
+   nir_intrinsic_instr *swizzle = nir_intrinsic_instr_create(
+      b->shader, nir_intrinsic_masked_swizzle_amd);
+   swizzle->num_components = intrin->num_components;
+   nir_src_copy(&swizzle->src[0], &intrin->src[0], swizzle);
+   nir_intrinsic_set_swizzle_mask(swizzle, (mask << 10) | 0x1f);
+   nir_ssa_dest_init(&swizzle->instr, &swizzle->dest,
+                     intrin->dest.ssa.num_components,
+                     intrin->dest.ssa.bit_size, NULL);
+
+   if (options->lower_to_scalar && swizzle->num_components > 1) {
+      return lower_subgroup_op_to_scalar(b, swizzle, options->lower_shuffle_to_32bit);
+   } else if (options->lower_shuffle_to_32bit && swizzle->src[0].ssa->bit_size == 64) {
+      return lower_subgroup_op_to_32bit(b, swizzle);
+   } else {
+      nir_builder_instr_insert(b, &swizzle->instr);
+      return &swizzle->dest.ssa;
+   }
+}
+
 static nir_ssa_def *
 lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
               const nir_lower_subgroups_options *options)
 {
+   if (intrin->intrinsic == nir_intrinsic_shuffle_xor &&
+       options->lower_shuffle_to_swizzle_amd &&
+       nir_src_is_const(intrin->src[1])) {
+      nir_ssa_def *result =
+         lower_shuffle_to_swizzle(b, intrin, options);
+      if (result)
+         return result;
+   }
+
    nir_ssa_def *index = nir_load_subgroup_invocation(b);
    bool is_shuffle = false;
    switch (intrin->intrinsic) {