From 1c9906d5ff22234b4e2c5c1c81a42a8766706b34 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 22 Jul 2020 16:57:22 -0700 Subject: [PATCH] nir: Add a pass to cut the trailing ends of vectors. Ideally we'd also handle unused middles of vectors and reswizzle ALU-only uses of it so we could write fewer channels, but that's future work/ Reviewed-by: Rhys Perry Reviewed-by: Kenneth Graunke Part-of: --- src/compiler/Makefile.sources | 1 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_opt_shrink_vectors.c | 210 ++++++++++++++++++++++ 4 files changed, 213 insertions(+) create mode 100644 src/compiler/nir/nir_opt_shrink_vectors.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 34e61c280a4..b15907d1aee 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -327,6 +327,7 @@ NIR_FILES = \ nir/nir_opt_rematerialize_compares.c \ nir/nir_opt_remove_phis.c \ nir/nir_opt_shrink_load.c \ + nir/nir_opt_shrink_vectors.c \ nir/nir_opt_sink.c \ nir/nir_opt_trivial_continues.c \ nir/nir_opt_undef.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index d0970011ec5..c326dd5ec56 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -208,6 +208,7 @@ files_libnir = files( 'nir_opt_rematerialize_compares.c', 'nir_opt_remove_phis.c', 'nir_opt_shrink_load.c', + 'nir_opt_shrink_vectors.c', 'nir_opt_sink.c', 'nir_opt_trivial_continues.c', 'nir_opt_undef.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 7533cfe4dda..a3b2c84dcab 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4619,6 +4619,7 @@ bool nir_opt_remove_phis(nir_shader *shader); bool nir_opt_remove_phis_block(nir_block *block); bool nir_opt_shrink_load(nir_shader *shader); +bool nir_opt_shrink_vectors(nir_shader *shader); bool nir_opt_trivial_continues(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_shrink_vectors.c b/src/compiler/nir/nir_opt_shrink_vectors.c new file mode 100644 index 00000000000..8b01f9f4817 --- /dev/null +++ b/src/compiler/nir/nir_opt_shrink_vectors.c @@ -0,0 +1,210 @@ +/* + * Copyright © 2020 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file + * + * Trims off the unused trailing components of SSA defs. + * + * Due to various optimization passes (or frontend implementations, + * particularly prog_to_nir), we may have instructions generating vectors + * whose components don't get read by any instruction. While it can be tricky + * to eliminate either unused low components of a writemask (you might need to + * increment some offset from a load_uniform, for example) or channels in the + * middle of a partially set writemask (you might need to reswizzle ALU ops + * using the value), it is trivial to just drop the trailing components. + * + * This pass is probably only of use to vector backends -- scalar backends + * typically get unused def channel trimming by scalarizing and dead code + * elimination. + */ + +#include "nir.h" +#include "nir_builder.h" + +static bool +shrink_dest_to_read_mask(nir_ssa_def *def) +{ + /* early out if there's nothing to do. */ + if (def->num_components == 1) + return false; + + unsigned mask = nir_ssa_def_components_read(def); + int last_bit = util_last_bit(mask); + + /* If nothing was read, leave it up to DCE. */ + if (!mask) + return false; + + if (def->num_components > last_bit) { + def->num_components = last_bit; + return true; + } + + return false; +} + +static bool +opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr) +{ + nir_ssa_def *def = &instr->dest.dest.ssa; + + if (nir_op_infos[instr->op].output_size == 0) { + if (shrink_dest_to_read_mask(def)) { + instr->dest.write_mask &= + BITFIELD_MASK(def->num_components); + + return true; + } + } else { + + switch (instr->op) { + case nir_op_vec4: + case nir_op_vec3: + case nir_op_vec2: { + unsigned mask = nir_ssa_def_components_read(def); + + /* If nothing was read, leave it up to DCE. */ + if (mask == 0) + return false; + + int last_bit = util_last_bit(mask); + if (last_bit < def->num_components) { + nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS] = { 0 }; + for (int i = 0; i < last_bit; i++) + srcs[i] = nir_ssa_for_alu_src(b, instr, i); + + nir_ssa_def *new_vec = nir_vec(b, srcs, last_bit); + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_vec)); + return true; + } + break; + } + + default: + break; + } + } + + return false; +} + +static bool +opt_shrink_vectors_intrinsic(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_vertex: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_push_constant: + case nir_intrinsic_load_constant: + case nir_intrinsic_load_global: + case nir_intrinsic_load_kernel_input: + case nir_intrinsic_load_scratch: + break; + default: + return false; + } + + assert(nir_intrinsic_infos[instr->intrinsic].has_dest); + /* Must be a vectorized intrinsic that we can resize. */ + assert(instr->num_components != 0); + + if (shrink_dest_to_read_mask(&instr->dest.ssa)) { + instr->num_components = instr->dest.ssa.num_components; + return true; + } + + return false; +} + +static bool +opt_shrink_vectors_load_const(nir_load_const_instr *instr) +{ + return shrink_dest_to_read_mask(&instr->def); +} + +static bool +opt_shrink_vectors_ssa_undef(nir_ssa_undef_instr *instr) +{ + return shrink_dest_to_read_mask(&instr->def); +} + +static bool +opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr) +{ + b->cursor = nir_before_instr(instr); + + switch (instr->type) { + case nir_instr_type_alu: + return opt_shrink_vectors_alu(b, nir_instr_as_alu(instr)); + + case nir_instr_type_intrinsic: + return opt_shrink_vectors_intrinsic(nir_instr_as_intrinsic(instr)); + + case nir_instr_type_load_const: + return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr)); + + case nir_instr_type_ssa_undef: + return opt_shrink_vectors_ssa_undef(nir_instr_as_ssa_undef(instr)); + + default: + return false; + } + + return true; +} + +bool +nir_opt_shrink_vectors(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + progress |= opt_shrink_vectors_instr(&b, instr); + } + } + + if (progress) { + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } else { + nir_metadata_preserve(function->impl, nir_metadata_all); + } + } + + return progress; +} -- 2.30.2