src/glsl/nir/nir_lower_alu_to_scalar.c

   1 /*
   2  * Copyright © 2014-2015 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25
  26 /** @file nir_lower_alu_to_scalar.c
  27  *
  28  * Replaces nir_alu_instr operations with more than one channel used in the
  29  * arguments with individual per-channel operations.
  30  */
  31
  32 static void
  33 nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
  34 {
  35    nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
  36    instr->dest.write_mask = (1 << num_components) - 1;
  37 }
  38
  39 static void
  40 lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
  41                 void *mem_ctx)
  42 {
  43    unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
  44
  45    nir_ssa_def *last = NULL;
  46    for (unsigned i = 0; i < num_components; i++) {
  47       nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
  48       nir_alu_ssa_dest_init(chan, 1);
  49       nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
  50       chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
  51       if (nir_op_infos[chan_op].num_inputs > 1) {
  52          assert(nir_op_infos[chan_op].num_inputs == 2);
  53          nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
  54          chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
  55       }
  56
  57       nir_instr_insert_before(&instr->instr, &chan->instr);
  58
  59       if (i == 0) {
  60          last = &chan->dest.dest.ssa;
  61       } else {
  62          nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op);
  63          nir_alu_ssa_dest_init(merge, 1);
  64          merge->dest.write_mask = 1;
  65          merge->src[0].src = nir_src_for_ssa(last);
  66          merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa);
  67          nir_instr_insert_before(&instr->instr, &merge->instr);
  68          last = &merge->dest.dest.ssa;
  69       }
  70    }
  71
  72    assert(instr->dest.write_mask == 1);
  73    nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last),
  74                             mem_ctx);
  75    nir_instr_remove(&instr->instr);
  76 }
  77
  78 static void
  79 lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
  80 {
  81    unsigned num_src = nir_op_infos[instr->op].num_inputs;
  82    unsigned i, chan;
  83
  84    assert(instr->dest.dest.is_ssa);
  85    assert(instr->dest.write_mask != 0);
  86
  87 #define LOWER_REDUCTION(name, chan, merge) \
  88    case name##2: \
  89    case name##3: \
  90    case name##4: \
  91       lower_reduction(instr, chan, merge, mem_ctx); \
  92       break;
  93
  94    switch (instr->op) {
  95    case nir_op_vec4:
  96    case nir_op_vec3:
  97    case nir_op_vec2:
  98       /* We don't need to scalarize these ops, they're the ones generated to
  99        * group up outputs into a value that can be SSAed.
 100        */
 101       return;
 102
 103    case nir_op_unpack_unorm_4x8:
 104    case nir_op_unpack_snorm_4x8:
 105    case nir_op_unpack_unorm_2x16:
 106    case nir_op_unpack_snorm_2x16:
 107       /* There is no scalar version of these ops, unless we were to break it
 108        * down to bitshifts and math (which is definitely not intended).
 109        */
 110       return;
 111
 112    case nir_op_unpack_half_2x16:
 113       /* We could split this into unpack_half_2x16_split_[xy], but should
 114        * we?
 115        */
 116       return;
 117
 118       LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
 119       LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
 120       LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
 121       LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
 122       LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
 123       LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
 124       LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
 125       LOWER_REDUCTION(nir_op_ball, nir_op_imov, nir_op_iand);
 126       LOWER_REDUCTION(nir_op_bany, nir_op_imov, nir_op_ior);
 127       LOWER_REDUCTION(nir_op_fall, nir_op_fmov, nir_op_fand);
 128       LOWER_REDUCTION(nir_op_fany, nir_op_fmov, nir_op_for);
 129
 130    default:
 131       break;
 132    }
 133
 134    if (instr->dest.dest.ssa.num_components == 1)
 135       return;
 136
 137    unsigned num_components = instr->dest.dest.ssa.num_components;
 138    static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4};
 139    nir_alu_instr *vec_instr =
 140       nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]);
 141    nir_alu_ssa_dest_init(vec_instr, num_components);
 142
 143    for (chan = 0; chan < 4; chan++) {
 144       if (!(instr->dest.write_mask & (1 << chan)))
 145          continue;
 146
 147       nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op);
 148       for (i = 0; i < num_src; i++) {
 149          /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
 150           * args (input_sizes[] == 1).
 151           */
 152          assert(nir_op_infos[instr->op].input_sizes[i] < 2);
 153          unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
 154                               0 : chan);
 155
 156          nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
 157          for (int j = 0; j < 4; j++)
 158             lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
 159       }
 160
 161       nir_alu_ssa_dest_init(lower, 1);
 162       lower->dest.saturate = instr->dest.saturate;
 163       vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa);
 164
 165       nir_instr_insert_before(&instr->instr, &lower->instr);
 166    }
 167
 168    nir_instr_insert_before(&instr->instr, &vec_instr->instr);
 169
 170    nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
 171                             nir_src_for_ssa(&vec_instr->dest.dest.ssa),
 172                             mem_ctx);
 173
 174    nir_instr_remove(&instr->instr);
 175 }
 176
 177 static bool
 178 lower_alu_to_scalar_block(nir_block *block, void *data)
 179 {
 180    nir_foreach_instr_safe(block, instr) {
 181       if (instr->type == nir_instr_type_alu)
 182          lower_alu_instr_scalar(nir_instr_as_alu(instr), data);
 183    }
 184
 185    return true;
 186 }
 187
 188 static void
 189 nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
 190 {
 191    nir_foreach_block(impl, lower_alu_to_scalar_block, ralloc_parent(impl));
 192 }
 193
 194 void
 195 nir_lower_alu_to_scalar(nir_shader *shader)
 196 {
 197    nir_foreach_overload(shader, overload) {
 198       if (overload->impl)
 199          nir_lower_alu_to_scalar_impl(overload->impl);
 200    }
 201 }