nir: Add ability to lower non-const quad broadcasts to const ones.
authorTimur Kristóf <timur.kristof@gmail.com>
Wed, 11 Mar 2020 14:01:56 +0000 (15:01 +0100)
committerMarge Bot <eric+marge@anholt.net>
Thu, 12 Mar 2020 13:16:07 +0000 (13:16 +0000)
Some hardware doesn't support subgroup shuffle, and on such hardware
it makes no sense to lower quad broadcasts to shuffle. Instead, let's
lower them to four const quad broadcasts, paired with bcsel instructions.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4147>

src/compiler/nir/nir.h
src/compiler/nir/nir_lower_subgroups.c

index a151f7b8e74e875c23e31591c565520d433460af..c41e78538f08f6198767ffb3a29ec471445f99fd 100644 (file)
@@ -3896,6 +3896,7 @@ typedef struct nir_lower_subgroups_options {
    bool lower_shuffle_to_32bit:1;
    bool lower_quad:1;
    bool lower_quad_broadcast_dynamic:1;
+   bool lower_quad_broadcast_dynamic_to_const:1;
 } nir_lower_subgroups_options;
 
 bool nir_lower_subgroups(nir_shader *shader,
index 4462c708ec8a1f91dc6128dcc3492db28c1b9b51..f5eebb851446dce901085dd9f5c110e5688e5bd5 100644 (file)
@@ -301,6 +301,46 @@ build_subgroup_mask(nir_builder *b, unsigned bit_size,
                                   nir_load_subgroup_size(b)));
 }
 
+static nir_ssa_def *
+lower_dynamic_quad_broadcast(nir_builder *b, nir_intrinsic_instr *intrin,
+                             const nir_lower_subgroups_options *options)
+{
+   if (!options->lower_quad_broadcast_dynamic_to_const)
+      return lower_shuffle(b, intrin, options->lower_to_scalar, false);
+
+   nir_ssa_def *dst = NULL;
+
+   for (unsigned i = 0; i < 4; ++i) {
+      nir_intrinsic_instr *qbcst =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_quad_broadcast);
+
+      qbcst->num_components = intrin->num_components;
+      qbcst->src[1] = nir_src_for_ssa(nir_imm_int(b, i));
+      nir_src_copy(&qbcst->src[0], &intrin->src[0], qbcst);
+      nir_ssa_dest_init(&qbcst->instr, &qbcst->dest,
+                        intrin->dest.ssa.num_components,
+                        intrin->dest.ssa.bit_size, NULL);
+
+      nir_ssa_def *qbcst_dst = NULL;
+
+      if (options->lower_to_scalar && qbcst->num_components > 1) {
+         qbcst_dst = lower_subgroup_op_to_scalar(b, qbcst, false);
+      } else {
+         nir_builder_instr_insert(b, &qbcst->instr);
+         qbcst_dst = &qbcst->dest.ssa;
+      }
+
+      if (i)
+         dst = nir_bcsel(b, nir_ieq(b, intrin->src[1].ssa,
+                                    nir_src_for_ssa(nir_imm_int(b, i)).ssa),
+                         qbcst_dst, dst);
+      else
+         dst = qbcst_dst;
+   }
+
+   return dst;
+}
+
 static nir_ssa_def *
 lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
 {
@@ -477,7 +517,7 @@ lower_subgroups_instr(nir_builder *b, nir_instr *instr, void *_options)
           (options->lower_quad_broadcast_dynamic &&
            intrin->intrinsic == nir_intrinsic_quad_broadcast &&
            !nir_src_is_const(intrin->src[1])))
-         return lower_shuffle(b, intrin, options->lower_to_scalar, false);
+         return lower_dynamic_quad_broadcast(b, intrin, options);
       else if (options->lower_to_scalar && intrin->num_components > 1)
          return lower_subgroup_op_to_scalar(b, intrin, false);
       break;