nir: Reduce destination size of ballot intrinsic when possible
authorMatt Turner <mattst88@gmail.com>
Fri, 30 Jun 2017 22:07:10 +0000 (15:07 -0700)
committerMatt Turner <mattst88@gmail.com>
Thu, 20 Jul 2017 23:56:49 +0000 (16:56 -0700)
Some hardware, like i965, doesn't support group sizes greater than 32.
In that case, we can reduce the destination size of the ballot
intrinsic, which will simplify our code generation.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/compiler/nir/nir.h
src/compiler/nir/nir_opt_intrinsics.c
src/intel/compiler/brw_compiler.c

index 5ddab57776f0eb4e2a71cf5c40a7cbbddb2fd8b7..78684fd50c843a68ea65fc5af0fcb0c78b2b1d05 100644 (file)
@@ -1843,6 +1843,8 @@ typedef struct nir_shader_compiler_options {
     */
    bool use_interpolated_input_intrinsics;
 
+   unsigned max_subgroup_size;
+
    unsigned max_unroll_iterations;
 } nir_shader_compiler_options;
 
index 4f36166510b85f48ab57a7e375d8a646eef7358b..f12dc8779cba162d156a4785e74fe0c2b38ac0a7 100644 (file)
@@ -62,6 +62,24 @@ opt_intrinsics_impl(nir_function_impl *impl)
             replacement = nir_imm_int(&b, NIR_TRUE);
             break;
          }
+         case nir_intrinsic_ballot: {
+            assert(b.shader->options->max_subgroup_size != 0);
+            if (b.shader->options->max_subgroup_size > 32 ||
+                intrin->dest.ssa.bit_size <= 32)
+               continue;
+
+            nir_intrinsic_instr *ballot =
+               nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot);
+            nir_ssa_dest_init(&ballot->instr, &ballot->dest, 1, 32, NULL);
+            nir_src_copy(&ballot->src[0], &intrin->src[0], ballot);
+
+            nir_builder_instr_insert(&b, &ballot->instr);
+
+            replacement = nir_pack_64_2x32_split(&b,
+                                                 &ballot->dest.ssa,
+                                                 nir_imm_int(&b, 0));
+            break;
+         }
          case nir_intrinsic_load_subgroup_eq_mask:
          case nir_intrinsic_load_subgroup_ge_mask:
          case nir_intrinsic_load_subgroup_gt_mask:
index 39a8237ff07162f75ab91bd67c3e07876dc7ec29..e86ab0fc687b7789b42cac8c5fbc2db25ed66950 100644 (file)
@@ -58,6 +58,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = {
    .lower_unpack_unorm_2x16 = true,
    .lower_unpack_unorm_4x8 = true,
    .lower_subgroup_masks = true,
+   .max_subgroup_size = 64, /* FIXME */
    .max_unroll_iterations = 32,
 };