src/compiler/nir/nir_opt_shrink_vectors.c

   1 /*
   2  * Copyright © 2020 Google LLC
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * @file
  26  *
  27  * Trims off the unused trailing components of SSA defs.
  28  *
  29  * Due to various optimization passes (or frontend implementations,
  30  * particularly prog_to_nir), we may have instructions generating vectors
  31  * whose components don't get read by any instruction.  While it can be tricky
  32  * to eliminate either unused low components of a writemask (you might need to
  33  * increment some offset from a load_uniform, for example) or channels in the
  34  * middle of a partially set writemask (you might need to reswizzle ALU ops
  35  * using the value), it is trivial to just drop the trailing components.
  36  *
  37  * This pass is probably only of use to vector backends -- scalar backends
  38  * typically get unused def channel trimming by scalarizing and dead code
  39  * elimination.
  40  */
  41
  42 #include "nir.h"
  43 #include "nir_builder.h"
  44
  45 static bool
  46 shrink_dest_to_read_mask(nir_ssa_def *def)
  47 {
  48    /* early out if there's nothing to do. */
  49    if (def->num_components == 1)
  50       return false;
  51
  52    unsigned mask = nir_ssa_def_components_read(def);
  53    int last_bit = util_last_bit(mask);
  54
  55    /* If nothing was read, leave it up to DCE. */
  56    if (!mask)
  57       return false;
  58
  59    if (def->num_components > last_bit) {
  60       def->num_components = last_bit;
  61       return true;
  62    }
  63
  64    return false;
  65 }
  66
  67 static bool
  68 opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
  69 {
  70    nir_ssa_def *def = &instr->dest.dest.ssa;
  71
  72    if (nir_op_infos[instr->op].output_size == 0) {
  73       if (shrink_dest_to_read_mask(def)) {
  74          instr->dest.write_mask &=
  75             BITFIELD_MASK(def->num_components);
  76
  77          return true;
  78       }
  79    } else {
  80
  81       switch (instr->op) {
  82       case nir_op_vec4:
  83       case nir_op_vec3:
  84       case nir_op_vec2: {
  85          unsigned mask = nir_ssa_def_components_read(def);
  86
  87          /* If nothing was read, leave it up to DCE. */
  88          if (mask == 0)
  89             return false;
  90
  91          int last_bit = util_last_bit(mask);
  92          if (last_bit < def->num_components) {
  93             nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS] = { 0 };
  94             for (int i = 0; i < last_bit; i++)
  95                srcs[i] = nir_ssa_for_alu_src(b, instr, i);
  96
  97             nir_ssa_def *new_vec = nir_vec(b, srcs, last_bit);
  98             nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_vec));
  99             return true;
 100          }
 101          break;
 102       }
 103
 104       default:
 105          break;
 106       }
 107    }
 108
 109    return false;
 110 }
 111
 112 static bool
 113 opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
 114 {
 115    switch (instr->intrinsic) {
 116    case nir_intrinsic_load_uniform:
 117    case nir_intrinsic_load_ubo:
 118    case nir_intrinsic_load_input:
 119    case nir_intrinsic_load_input_vertex:
 120    case nir_intrinsic_load_per_vertex_input:
 121    case nir_intrinsic_load_interpolated_input:
 122    case nir_intrinsic_load_ssbo:
 123    case nir_intrinsic_load_push_constant:
 124    case nir_intrinsic_load_constant:
 125    case nir_intrinsic_load_global:
 126    case nir_intrinsic_load_global_constant:
 127    case nir_intrinsic_load_kernel_input:
 128    case nir_intrinsic_load_scratch:
 129    case nir_intrinsic_store_output:
 130    case nir_intrinsic_store_per_vertex_output:
 131    case nir_intrinsic_store_ssbo:
 132    case nir_intrinsic_store_shared:
 133    case nir_intrinsic_store_global:
 134    case nir_intrinsic_store_scratch:
 135       break;
 136    default:
 137       return false;
 138    }
 139
 140    /* Must be a vectorized intrinsic that we can resize. */
 141    assert(instr->num_components != 0);
 142
 143    if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
 144       /* loads: Trim the dest to the used channels */
 145
 146       if (shrink_dest_to_read_mask(&instr->dest.ssa)) {
 147          instr->num_components = instr->dest.ssa.num_components;
 148          return true;
 149       }
 150    } else {
 151       /* Stores: trim the num_components stored according to the write
 152        * mask.
 153        */
 154       unsigned write_mask = nir_intrinsic_write_mask(instr);
 155       unsigned last_bit = util_last_bit(write_mask);
 156       if (last_bit < instr->num_components && instr->src[0].is_ssa) {
 157          nir_ssa_def *def = nir_channels(b, instr->src[0].ssa,
 158                                          BITSET_MASK(last_bit));
 159          nir_instr_rewrite_src(&instr->instr,
 160                                &instr->src[0],
 161                                nir_src_for_ssa(def));
 162          instr->num_components = last_bit;
 163
 164          return true;
 165       }
 166    }
 167
 168    return false;
 169 }
 170
 171 static bool
 172 opt_shrink_vectors_load_const(nir_load_const_instr *instr)
 173 {
 174    return shrink_dest_to_read_mask(&instr->def);
 175 }
 176
 177 static bool
 178 opt_shrink_vectors_ssa_undef(nir_ssa_undef_instr *instr)
 179 {
 180    return shrink_dest_to_read_mask(&instr->def);
 181 }
 182
 183 static bool
 184 opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr)
 185 {
 186    b->cursor = nir_before_instr(instr);
 187
 188    switch (instr->type) {
 189    case nir_instr_type_alu:
 190       return opt_shrink_vectors_alu(b, nir_instr_as_alu(instr));
 191
 192    case nir_instr_type_intrinsic:
 193       return opt_shrink_vectors_intrinsic(b, nir_instr_as_intrinsic(instr));
 194
 195    case nir_instr_type_load_const:
 196       return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr));
 197
 198    case nir_instr_type_ssa_undef:
 199       return opt_shrink_vectors_ssa_undef(nir_instr_as_ssa_undef(instr));
 200
 201    default:
 202       return false;
 203    }
 204
 205    return true;
 206 }
 207
 208 bool
 209 nir_opt_shrink_vectors(nir_shader *shader)
 210 {
 211    bool progress = false;
 212
 213    nir_foreach_function(function, shader) {
 214       if (!function->impl)
 215          continue;
 216
 217       nir_builder b;
 218       nir_builder_init(&b, function->impl);
 219
 220       nir_foreach_block(block, function->impl) {
 221          nir_foreach_instr(instr, block) {
 222             progress |= opt_shrink_vectors_instr(&b, instr);
 223          }
 224       }
 225
 226       if (progress) {
 227          nir_metadata_preserve(function->impl,
 228                                nir_metadata_block_index |
 229                                nir_metadata_dominance);
 230       } else {
 231          nir_metadata_preserve(function->impl, nir_metadata_all);
 232       }
 233    }
 234
 235    return progress;
 236 }