src/compiler/nir/nir_move_vec_src_uses_to_dest.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Jason Ekstrand (jason@jlekstrand.net)
  25  *
  26  */
  27
  28 #include "nir.h"
  29
  30 /*
  31  * Implements a pass that tries to move uses vecN sources to their
  32  * destinations.  This is kind of like an inverse copy-propagation pass.
  33  * For instance, if you have
  34  *
  35  * ssa_1 = vec4(a, b, c, d)
  36  * ssa_2 = fadd(a, b)
  37  *
  38  * This will be turned into
  39  *
  40  * ssa_1 = vec4(a, b, c, d)
  41  * ssa_2 = fadd(ssa_1.x, ssa_1.y)
  42  *
  43  * While this is "worse" because it adds a bunch of unneeded dependencies, it
  44  * actually makes it much easier for vec4-based backends to coalesce the MOVs
  45  * that result from the vec4 operation because it doesn't have to worry about
  46  * quite as many reads.
  47  */
  48
  49 /* Returns true if the given SSA def dominates the instruction.  An SSA def is
  50  * considered to *not* dominate the instruction that defines it.
  51  */
  52 static bool
  53 ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr)
  54 {
  55    if (instr->index <= def->parent_instr->index) {
  56       return false;
  57    } else if (def->parent_instr->block == instr->block) {
  58       return def->parent_instr->index < instr->index;
  59    } else {
  60       return nir_block_dominates(def->parent_instr->block, instr->block);
  61    }
  62 }
  63
  64 static bool
  65 move_vec_src_uses_to_dest_block(nir_block *block)
  66 {
  67    bool progress = false;
  68
  69    nir_foreach_instr(instr, block) {
  70       if (instr->type != nir_instr_type_alu)
  71          continue;
  72
  73       nir_alu_instr *vec = nir_instr_as_alu(instr);
  74
  75       switch (vec->op) {
  76       case nir_op_vec2:
  77       case nir_op_vec3:
  78       case nir_op_vec4:
  79          break;
  80       default:
  81          continue; /* The loop */
  82       }
  83
  84       /* Can't handle non-SSA vec operations */
  85       if (!vec->dest.dest.is_ssa)
  86          continue;
  87
  88       /* Can't handle saturation */
  89       if (vec->dest.saturate)
  90          continue;
  91
  92       /* First, mark all of the sources we are going to consider for rewriting
  93        * to the destination
  94        */
  95       int srcs_remaining = 0;
  96       for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
  97          /* We can't rewrite a source if it's not in SSA form */
  98          if (!vec->src[i].src.is_ssa)
  99             continue;
 100
 101          /* We can't rewrite a source if it has modifiers */
 102          if (vec->src[i].abs || vec->src[i].negate)
 103             continue;
 104
 105          srcs_remaining |= 1 << i;
 106       }
 107
 108       /* We can't actually do anything with this instruction */
 109       if (srcs_remaining == 0)
 110          continue;
 111
 112       for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
 113          int8_t swizzle[4] = { -1, -1, -1, -1 };
 114
 115          for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
 116             if (vec->src[j].src.ssa != vec->src[i].src.ssa)
 117                continue;
 118
 119             /* Mark the given channel as having been handled */
 120             srcs_remaining &= ~(1 << j);
 121
 122             /* Mark the appropriate channel as coming from src j */
 123             swizzle[vec->src[j].swizzle[0]] = j;
 124          }
 125
 126          nir_foreach_use_safe(use, vec->src[i].src.ssa) {
 127             if (use->parent_instr == &vec->instr)
 128                continue;
 129
 130             /* We need to dominate the use if we are going to rewrite it */
 131             if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr))
 132                continue;
 133
 134             /* For now, we'll just rewrite ALU instructions */
 135             if (use->parent_instr->type != nir_instr_type_alu)
 136                continue;
 137
 138             assert(use->is_ssa);
 139
 140             nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr);
 141
 142             /* Figure out which source we're actually looking at */
 143             nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
 144             unsigned src_idx = use_alu_src - use_alu->src;
 145             assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
 146
 147             bool can_reswizzle = true;
 148             for (unsigned j = 0; j < 4; j++) {
 149                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
 150                   continue;
 151
 152                if (swizzle[use_alu_src->swizzle[j]] == -1) {
 153                   can_reswizzle = false;
 154                   break;
 155                }
 156             }
 157
 158             if (!can_reswizzle)
 159                continue;
 160
 161             /* At this point, we have determined that the given use can be
 162              * reswizzled to actually use the destination of the vecN operation.
 163              * Go ahead and rewrite it as needed.
 164              */
 165             nir_instr_rewrite_src(use->parent_instr, use,
 166                                   nir_src_for_ssa(&vec->dest.dest.ssa));
 167             for (unsigned j = 0; j < 4; j++) {
 168                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
 169                   continue;
 170
 171                use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
 172                progress = true;
 173             }
 174          }
 175       }
 176    }
 177
 178    return progress;
 179 }
 180
 181 static bool
 182 nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
 183 {
 184    bool progress = false;
 185
 186    nir_metadata_require(impl, nir_metadata_dominance);
 187
 188    nir_index_instrs(impl);
 189
 190    nir_foreach_block(block, impl) {
 191       progress |= move_vec_src_uses_to_dest_block(block);
 192    }
 193
 194    nir_metadata_preserve(impl, nir_metadata_block_index |
 195                                nir_metadata_dominance);
 196
 197    return progress;
 198 }
 199
 200 bool
 201 nir_move_vec_src_uses_to_dest(nir_shader *shader)
 202 {
 203    bool progress = false;
 204
 205    nir_foreach_function(function, shader) {
 206       if (function->impl)
 207          progress |= nir_move_vec_src_uses_to_dest_impl(shader,
 208                                                         function->impl);
 209    }
 210
 211    return progress;
 212 }