src/compiler/nir/nir_move_vec_src_uses_to_dest.c

   1 /*
   2  * Copyright © 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Jason Ekstrand (jason@jlekstrand.net)
  25  *
  26  */
  27
  28 #include "nir.h"
  29
  30 /*
  31  * Implements a pass that tries to move uses vecN sources to their
  32  * destinations.  This is kind of like an inverse copy-propagation pass.
  33  * For instance, if you have
  34  *
  35  * ssa_1 = vec4(a, b, c, d)
  36  * ssa_2 = fadd(a, b)
  37  *
  38  * This will be turned into
  39  *
  40  * ssa_1 = vec4(a, b, c, d)
  41  * ssa_2 = fadd(ssa_1.x, ssa_1.y)
  42  *
  43  * While this is "worse" because it adds a bunch of unneeded dependencies, it
  44  * actually makes it much easier for vec4-based backends to coalesce the MOV's
  45  * that result from the vec4 operation because it doesn't have to worry about
  46  * quite as many reads.
  47  */
  48
  49 /* Returns true if the given SSA def dominates the instruction.  An SSA def is
  50  * considered to *not* dominate the instruction that defines it.
  51  */
  52 static bool
  53 ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr)
  54 {
  55    if (instr->index <= def->parent_instr->index) {
  56       return false;
  57    } else if (def->parent_instr->block == instr->block) {
  58       return def->parent_instr->index < instr->index;
  59    } else {
  60       return nir_block_dominates(def->parent_instr->block, instr->block);
  61    }
  62 }
  63
  64 static bool
  65 move_vec_src_uses_to_dest_block(nir_block *block, void *shader)
  66 {
  67    nir_foreach_instr(block, instr) {
  68       if (instr->type != nir_instr_type_alu)
  69          continue;
  70
  71       nir_alu_instr *vec = nir_instr_as_alu(instr);
  72
  73       switch (vec->op) {
  74       case nir_op_vec2:
  75       case nir_op_vec3:
  76       case nir_op_vec4:
  77          break;
  78       default:
  79          continue; /* The loop */
  80       }
  81
  82       /* Can't handle non-SSA vec operations */
  83       if (!vec->dest.dest.is_ssa)
  84          continue;
  85
  86       /* Can't handle saturation */
  87       if (vec->dest.saturate)
  88          continue;
  89
  90       /* First, mark all of the sources we are going to consider for rewriting
  91        * to the destination
  92        */
  93       int srcs_remaining = 0;
  94       for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
  95          /* We can't rewrite a source if it's not in SSA form */
  96          if (!vec->src[i].src.is_ssa)
  97             continue;
  98
  99          /* We can't rewrite a source if it has modifiers */
 100          if (vec->src[i].abs || vec->src[i].negate)
 101             continue;
 102
 103          srcs_remaining |= 1 << i;
 104       }
 105
 106       /* We can't actually do anything with this instruction */
 107       if (srcs_remaining == 0)
 108          continue;
 109
 110       for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
 111          int8_t swizzle[4] = { -1, -1, -1, -1 };
 112
 113          for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
 114             if (vec->src[j].src.ssa != vec->src[i].src.ssa)
 115                continue;
 116
 117             /* Mark the given chanle as having been handled */
 118             srcs_remaining &= ~(1 << j);
 119
 120             /* Mark the appropreate channel as coming from src j */
 121             swizzle[vec->src[j].swizzle[0]] = j;
 122          }
 123
 124          nir_foreach_use_safe(vec->src[i].src.ssa, use) {
 125             if (use->parent_instr == &vec->instr)
 126                continue;
 127
 128             /* We need to dominate the use if we are going to rewrite it */
 129             if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr))
 130                continue;
 131
 132             /* For now, we'll just rewrite ALU instructions */
 133             if (use->parent_instr->type != nir_instr_type_alu)
 134                continue;
 135
 136             assert(use->is_ssa);
 137
 138             nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr);
 139
 140             /* Figure out which source we're actually looking at */
 141             nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
 142             unsigned src_idx = use_alu_src - use_alu->src;
 143             assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
 144
 145             bool can_reswizzle = true;
 146             for (unsigned j = 0; j < 4; j++) {
 147                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
 148                   continue;
 149
 150                if (swizzle[use_alu_src->swizzle[j]] == -1) {
 151                   can_reswizzle = false;
 152                   break;
 153                }
 154             }
 155
 156             if (!can_reswizzle)
 157                continue;
 158
 159             /* At this point, we have determined that the given use can be
 160              * reswizzled to actually use the destination of the vecN operation.
 161              * Go ahead and rewrite it as needed.
 162              */
 163             nir_instr_rewrite_src(use->parent_instr, use,
 164                                   nir_src_for_ssa(&vec->dest.dest.ssa));
 165             for (unsigned j = 0; j < 4; j++) {
 166                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
 167                   continue;
 168
 169                use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
 170             }
 171          }
 172       }
 173    }
 174
 175    return true;
 176 }
 177
 178 static void
 179 nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
 180 {
 181    nir_metadata_require(impl, nir_metadata_dominance);
 182
 183    nir_index_instrs(impl);
 184    nir_foreach_block(impl, move_vec_src_uses_to_dest_block, shader);
 185
 186    nir_metadata_preserve(impl, nir_metadata_block_index |
 187                                nir_metadata_dominance);
 188 }
 189
 190 void
 191 nir_move_vec_src_uses_to_dest(nir_shader *shader)
 192 {
 193    nir_foreach_function(shader, function) {
 194       if (function->impl)
 195          nir_move_vec_src_uses_to_dest_impl(shader, function->impl);
 196    }
 197 }