From 28da82f9783091cb3e79586962f98a5bc165cff8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 22 Aug 2017 13:23:59 -0700 Subject: [PATCH] nir: Add a new subgroups lowering pass This commit pulls nir_lower_read_invocations_to_scalar along with most of the guts of nir_opt_intrinsics (which mostly does subgroup lowering) into a new nir_lower_subgroups pass. There are various other bits of subgroup lowering that we're going to want to do so it makes a bit more sense to keep it all together in one pass. We also move it in i965 to happen after nir_lower_system_values to ensure that because we want to handle the subgroup mask system value intrinsics here. Reviewed-by: Iago Toral Quiroga --- src/compiler/Makefile.sources | 2 +- src/compiler/nir/meson.build | 2 +- src/compiler/nir/nir.h | 12 +- .../nir/nir_lower_read_invocation_to_scalar.c | 112 ----------- src/compiler/nir/nir_lower_subgroups.c | 184 ++++++++++++++++++ src/compiler/nir/nir_opt_intrinsics.c | 71 +------ src/intel/compiler/brw_compiler.c | 3 - src/intel/compiler/brw_nir.c | 8 +- 8 files changed, 208 insertions(+), 186 deletions(-) delete mode 100644 src/compiler/nir/nir_lower_read_invocation_to_scalar.c create mode 100644 src/compiler/nir/nir_lower_subgroups.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index b80468c7a44..abcd8c6641f 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -233,11 +233,11 @@ NIR_FILES = \ nir/nir_lower_passthrough_edgeflags.c \ nir/nir_lower_patch_vertices.c \ nir/nir_lower_phis_to_scalar.c \ - nir/nir_lower_read_invocation_to_scalar.c \ nir/nir_lower_regs_to_ssa.c \ nir/nir_lower_returns.c \ nir/nir_lower_samplers.c \ nir/nir_lower_samplers_as_deref.c \ + nir/nir_lower_subgroups.c \ nir/nir_lower_system_values.c \ nir/nir_lower_tex.c \ nir/nir_lower_to_source_mods.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index bb9ac084480..e5c8326aa06 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -120,11 +120,11 @@ files_libnir = files( 'nir_lower_passthrough_edgeflags.c', 'nir_lower_patch_vertices.c', 'nir_lower_phis_to_scalar.c', - 'nir_lower_read_invocation_to_scalar.c', 'nir_lower_regs_to_ssa.c', 'nir_lower_returns.c', 'nir_lower_samplers.c', 'nir_lower_samplers_as_deref.c', + 'nir_lower_subgroups.c', 'nir_lower_system_values.c', 'nir_lower_tex.c', 'nir_lower_to_source_mods.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0174c30504e..1f4ec2cb866 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1835,9 +1835,6 @@ typedef struct nir_shader_compiler_options { bool lower_extract_byte; bool lower_extract_word; - bool lower_vote_trivial; - bool lower_subgroup_masks; - /** * Does the driver support real 32-bit integers? (Otherwise, integers * are simulated by floats.) @@ -2479,6 +2476,15 @@ bool nir_lower_samplers(nir_shader *shader, bool nir_lower_samplers_as_deref(nir_shader *shader, const struct gl_shader_program *shader_program); +typedef struct nir_lower_subgroups_options { + bool lower_to_scalar:1; + bool lower_vote_trivial:1; + bool lower_subgroup_masks:1; +} nir_lower_subgroups_options; + +bool nir_lower_subgroups(nir_shader *shader, + const nir_lower_subgroups_options *options); + bool nir_lower_system_values(nir_shader *shader); typedef struct nir_lower_tex_options { diff --git a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c b/src/compiler/nir/nir_lower_read_invocation_to_scalar.c deleted file mode 100644 index 69e7c0a73a6..00000000000 --- a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" -#include "nir_builder.h" - -/** @file nir_lower_read_invocation_to_scalar.c - * - * Replaces nir_intrinsic_read_invocation/nir_intrinsic_read_first_invocation - * operations with num_components != 1 with individual per-channel operations. - */ - -static void -lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin) -{ - b->cursor = nir_before_instr(&intrin->instr); - - nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0], intrin->num_components); - nir_ssa_def *reads[4]; - - for (unsigned i = 0; i < intrin->num_components; i++) { - nir_intrinsic_instr *chan_intrin = - nir_intrinsic_instr_create(b->shader, intrin->intrinsic); - nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest, - 1, intrin->dest.ssa.bit_size, NULL); - chan_intrin->num_components = 1; - - /* value */ - chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i)); - /* invocation */ - if (intrin->intrinsic == nir_intrinsic_read_invocation) - nir_src_copy(&chan_intrin->src[1], &intrin->src[1], chan_intrin); - - nir_builder_instr_insert(b, &chan_intrin->instr); - - reads[i] = &chan_intrin->dest.ssa; - } - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(nir_vec(b, reads, - intrin->num_components))); - nir_instr_remove(&intrin->instr); -} - -static bool -nir_lower_read_invocation_to_scalar_impl(nir_function_impl *impl) -{ - bool progress = false; - nir_builder b; - nir_builder_init(&b, impl); - - nir_foreach_block(block, impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - if (intrin->num_components == 1) - continue; - - switch (intrin->intrinsic) { - case nir_intrinsic_read_invocation: - case nir_intrinsic_read_first_invocation: - lower_read_invocation_to_scalar(&b, intrin); - progress = true; - break; - default: - break; - } - } - } - - if (progress) { - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - } - return progress; -} - -bool -nir_lower_read_invocation_to_scalar(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(function, shader) { - if (function->impl) - progress |= nir_lower_read_invocation_to_scalar_impl(function->impl); - } - - return progress; -} diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c new file mode 100644 index 00000000000..0d11dc9c23a --- /dev/null +++ b/src/compiler/nir/nir_lower_subgroups.c @@ -0,0 +1,184 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * \file nir_opt_intrinsics.c + */ + +static nir_ssa_def * +lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin) +{ + /* This is safe to call on scalar things but it would be silly */ + assert(intrin->dest.ssa.num_components > 1); + + nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0], + intrin->num_components); + nir_ssa_def *reads[4]; + + for (unsigned i = 0; i < intrin->num_components; i++) { + nir_intrinsic_instr *chan_intrin = + nir_intrinsic_instr_create(b->shader, intrin->intrinsic); + nir_ssa_dest_init(&chan_intrin->instr, &chan_intrin->dest, + 1, intrin->dest.ssa.bit_size, NULL); + chan_intrin->num_components = 1; + + /* value */ + chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i)); + /* invocation */ + if (intrin->intrinsic == nir_intrinsic_read_invocation) + nir_src_copy(&chan_intrin->src[1], &intrin->src[1], chan_intrin); + + nir_builder_instr_insert(b, &chan_intrin->instr); + + reads[i] = &chan_intrin->dest.ssa; + } + + return nir_vec(b, reads, intrin->num_components); +} + +static nir_ssa_def * +high_subgroup_mask(nir_builder *b, + nir_ssa_def *count, + uint64_t base_mask) +{ + /* group_mask could probably be calculated more efficiently but we want to + * be sure not to shift by 64 if the subgroup size is 64 because the GLSL + * shift operator is undefined in that case. In any case if we were worried + * about efficency this should probably be done further down because the + * subgroup size is likely to be known at compile time. + */ + nir_ssa_def *subgroup_size = nir_load_subgroup_size(b); + nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull); + nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size); + nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift); + nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count); + + return nir_iand(b, higher_bits, group_mask); +} + +static nir_ssa_def * +lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + const nir_lower_subgroups_options *options) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_vote_any: + case nir_intrinsic_vote_all: + if (options->lower_vote_trivial) + return nir_ssa_for_src(b, intrin->src[0], 1); + break; + + case nir_intrinsic_vote_eq: + if (options->lower_vote_trivial) + return nir_imm_int(b, NIR_TRUE); + break; + + case nir_intrinsic_read_invocation: + case nir_intrinsic_read_first_invocation: + if (options->lower_to_scalar && intrin->num_components > 1) + return lower_read_invocation_to_scalar(b, intrin); + break; + + case nir_intrinsic_load_subgroup_eq_mask: + case nir_intrinsic_load_subgroup_ge_mask: + case nir_intrinsic_load_subgroup_gt_mask: + case nir_intrinsic_load_subgroup_le_mask: + case nir_intrinsic_load_subgroup_lt_mask: { + if (!options->lower_subgroup_masks) + return NULL; + + nir_ssa_def *count = nir_load_subgroup_invocation(b); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_subgroup_eq_mask: + return nir_ishl(b, nir_imm_int64(b, 1ull), count); + case nir_intrinsic_load_subgroup_ge_mask: + return high_subgroup_mask(b, count, ~0ull); + case nir_intrinsic_load_subgroup_gt_mask: + return high_subgroup_mask(b, count, ~1ull); + case nir_intrinsic_load_subgroup_le_mask: + return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count)); + case nir_intrinsic_load_subgroup_lt_mask: + return nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count)); + default: + unreachable("you seriously can't tell this is unreachable?"); + } + break; + } + default: + break; + } + + return NULL; +} + +static bool +lower_subgroups_impl(nir_function_impl *impl, + const nir_lower_subgroups_options *options) +{ + nir_builder b; + nir_builder_init(&b, impl); + bool progress = false; + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + b.cursor = nir_before_instr(instr); + + nir_ssa_def *lower = lower_subgroups_intrin(&b, intrin, options); + if (!lower) + continue; + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(lower)); + nir_instr_remove(instr); + progress = true; + } + } + + return progress; +} + +bool +nir_lower_subgroups(nir_shader *shader, + const nir_lower_subgroups_options *options) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + if (lower_subgroups_impl(function->impl, options)) { + progress = true; + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + return progress; +} diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index d5fdc516007..98c8b1a01e8 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -28,26 +28,6 @@ * \file nir_opt_intrinsics.c */ -static nir_ssa_def * -high_subgroup_mask(nir_builder *b, - nir_ssa_def *count, - uint64_t base_mask) -{ - /* group_mask could probably be calculated more efficiently but we want to - * be sure not to shift by 64 if the subgroup size is 64 because the GLSL - * shift operator is undefined in that case. In any case if we were worried - * about efficency this should probably be done further down because the - * subgroup size is likely to be known at compile time. - */ - nir_ssa_def *subgroup_size = nir_load_subgroup_size(b); - nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull); - nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size); - nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift); - nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count); - - return nir_iand(b, higher_bits, group_mask); -} - static bool opt_intrinsics_impl(nir_function_impl *impl) { @@ -66,22 +46,14 @@ opt_intrinsics_impl(nir_function_impl *impl) switch (intrin->intrinsic) { case nir_intrinsic_vote_any: - case nir_intrinsic_vote_all: { - nir_const_value *val = nir_src_as_const_value(intrin->src[0]); - if (!val && !b.shader->options->lower_vote_trivial) - continue; - - replacement = nir_ssa_for_src(&b, intrin->src[0], 1); + case nir_intrinsic_vote_all: + if (nir_src_as_const_value(intrin->src[0])) + replacement = nir_ssa_for_src(&b, intrin->src[0], 1); break; - } - case nir_intrinsic_vote_eq: { - nir_const_value *val = nir_src_as_const_value(intrin->src[0]); - if (!val && !b.shader->options->lower_vote_trivial) - continue; - - replacement = nir_imm_int(&b, NIR_TRUE); + case nir_intrinsic_vote_eq: + if (nir_src_as_const_value(intrin->src[0])) + replacement = nir_imm_int(&b, NIR_TRUE); break; - } case nir_intrinsic_ballot: { assert(b.shader->options->max_subgroup_size != 0); if (b.shader->options->max_subgroup_size > 32 || @@ -100,37 +72,6 @@ opt_intrinsics_impl(nir_function_impl *impl) nir_imm_int(&b, 0)); break; } - case nir_intrinsic_load_subgroup_eq_mask: - case nir_intrinsic_load_subgroup_ge_mask: - case nir_intrinsic_load_subgroup_gt_mask: - case nir_intrinsic_load_subgroup_le_mask: - case nir_intrinsic_load_subgroup_lt_mask: { - if (!b.shader->options->lower_subgroup_masks) - break; - - nir_ssa_def *count = nir_load_subgroup_invocation(&b); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_subgroup_eq_mask: - replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count); - break; - case nir_intrinsic_load_subgroup_ge_mask: - replacement = high_subgroup_mask(&b, count, ~0ull); - break; - case nir_intrinsic_load_subgroup_gt_mask: - replacement = high_subgroup_mask(&b, count, ~1ull); - break; - case nir_intrinsic_load_subgroup_le_mask: - replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count)); - break; - case nir_intrinsic_load_subgroup_lt_mask: - replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~0ull), count)); - break; - default: - unreachable("you seriously can't tell this is unreachable?"); - } - break; - } default: break; } diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index f6120cbf0ac..8c709b55a10 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -57,7 +57,6 @@ static const struct nir_shader_compiler_options scalar_nir_options = { .lower_unpack_snorm_4x8 = true, .lower_unpack_unorm_2x16 = true, .lower_unpack_unorm_4x8 = true, - .lower_subgroup_masks = true, .max_subgroup_size = 32, .max_unroll_iterations = 32, }; @@ -80,7 +79,6 @@ static const struct nir_shader_compiler_options vector_nir_options = { .lower_unpack_unorm_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_vote_trivial = true, .max_unroll_iterations = 32, }; @@ -99,7 +97,6 @@ static const struct nir_shader_compiler_options vector_nir_options_gen6 = { .lower_unpack_unorm_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_vote_trivial = true, .max_unroll_iterations = 32, }; diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index e5ff6deb2f7..f599f748a8a 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -620,7 +620,6 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) OPT(nir_lower_tex, &tex_options); OPT(nir_normalize_cubemap_coords); - OPT(nir_lower_read_invocation_to_scalar); OPT(nir_lower_global_vars_to_local); @@ -637,6 +636,13 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) OPT(nir_lower_system_values); + const nir_lower_subgroups_options subgroups_options = { + .lower_to_scalar = true, + .lower_subgroup_masks = true, + .lower_vote_trivial = !is_scalar, + }; + OPT(nir_lower_subgroups, &subgroups_options); + OPT(nir_lower_clip_cull_distance_arrays); nir_variable_mode indirect_mask = 0; -- 2.30.2