From 1038d385a9b5817132d16f9f5877743d0bb8cca0 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 30 Jun 2017 15:07:10 -0700 Subject: [PATCH] nir: Reduce destination size of ballot intrinsic when possible Some hardware, like i965, doesn't support group sizes greater than 32. In that case, we can reduce the destination size of the ballot intrinsic, which will simplify our code generation. Reviewed-by: Connor Abbott Reviewed-by: Kenneth Graunke --- src/compiler/nir/nir.h | 2 ++ src/compiler/nir/nir_opt_intrinsics.c | 18 ++++++++++++++++++ src/intel/compiler/brw_compiler.c | 1 + 3 files changed, 21 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 5ddab57776f..78684fd50c8 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1843,6 +1843,8 @@ typedef struct nir_shader_compiler_options { */ bool use_interpolated_input_intrinsics; + unsigned max_subgroup_size; + unsigned max_unroll_iterations; } nir_shader_compiler_options; diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index 4f36166510b..f12dc8779cb 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -62,6 +62,24 @@ opt_intrinsics_impl(nir_function_impl *impl) replacement = nir_imm_int(&b, NIR_TRUE); break; } + case nir_intrinsic_ballot: { + assert(b.shader->options->max_subgroup_size != 0); + if (b.shader->options->max_subgroup_size > 32 || + intrin->dest.ssa.bit_size <= 32) + continue; + + nir_intrinsic_instr *ballot = + nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot); + nir_ssa_dest_init(&ballot->instr, &ballot->dest, 1, 32, NULL); + nir_src_copy(&ballot->src[0], &intrin->src[0], ballot); + + nir_builder_instr_insert(&b, &ballot->instr); + + replacement = nir_pack_64_2x32_split(&b, + &ballot->dest.ssa, + nir_imm_int(&b, 0)); + break; + } case nir_intrinsic_load_subgroup_eq_mask: case nir_intrinsic_load_subgroup_ge_mask: case nir_intrinsic_load_subgroup_gt_mask: diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 39a8237ff07..e86ab0fc687 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -58,6 +58,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = { .lower_unpack_unorm_2x16 = true, .lower_unpack_unorm_4x8 = true, .lower_subgroup_masks = true, + .max_subgroup_size = 64, /* FIXME */ .max_unroll_iterations = 32, }; -- 2.30.2