src/compiler/nir/nir_opt_idiv_const.c

   1 /*
   2  * Copyright © 2018 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/fast_idiv_by_const.h"
  27 #include "util/u_math.h"
  28
  29 static nir_ssa_def *
  30 build_udiv(nir_builder *b, nir_ssa_def *n, uint64_t d)
  31 {
  32    if (d == 0) {
  33       return nir_imm_intN_t(b, 0, n->bit_size);
  34    } else if (util_is_power_of_two_or_zero64(d)) {
  35       return nir_ushr(b, n, nir_imm_int(b, util_logbase2_64(d)));
  36    } else {
  37       struct util_fast_udiv_info m =
  38          util_compute_fast_udiv_info(d, n->bit_size, n->bit_size);
  39
  40       if (m.pre_shift)
  41          n = nir_ushr(b, n, nir_imm_int(b, m.pre_shift));
  42       if (m.increment)
  43          n = nir_uadd_sat(b, n, nir_imm_intN_t(b, m.increment, n->bit_size));
  44       n = nir_umul_high(b, n, nir_imm_intN_t(b, m.multiplier, n->bit_size));
  45       if (m.post_shift)
  46          n = nir_ushr(b, n, nir_imm_int(b, m.post_shift));
  47
  48       return n;
  49    }
  50 }
  51
  52 static nir_ssa_def *
  53 build_umod(nir_builder *b, nir_ssa_def *n, uint64_t d)
  54 {
  55    if (d == 0) {
  56       return nir_imm_intN_t(b, 0, n->bit_size);
  57    } else if (util_is_power_of_two_or_zero64(d)) {
  58       return nir_iand(b, n, nir_imm_intN_t(b, d - 1, n->bit_size));
  59    } else {
  60       return nir_isub(b, n, nir_imul(b, build_udiv(b, n, d),
  61                                         nir_imm_intN_t(b, d, n->bit_size)));
  62    }
  63 }
  64
  65 static nir_ssa_def *
  66 build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d)
  67 {
  68    uint64_t abs_d = d < 0 ? -d : d;
  69
  70    if (d == 0) {
  71       return nir_imm_intN_t(b, 0, n->bit_size);
  72    } else if (d == 1) {
  73       return n;
  74    } else if (d == -1) {
  75       return nir_ineg(b, n);
  76    } else if (util_is_power_of_two_or_zero64(abs_d)) {
  77       nir_ssa_def *uq = nir_ushr(b, nir_iabs(b, n),
  78                                     nir_imm_int(b, util_logbase2_64(abs_d)));
  79       nir_ssa_def *n_neg = nir_ilt(b, n, nir_imm_intN_t(b, 0, n->bit_size));
  80       nir_ssa_def *neg = d < 0 ? nir_inot(b, n_neg) : n_neg;
  81       return nir_bcsel(b, neg, nir_ineg(b, uq), uq);
  82    } else {
  83       struct util_fast_sdiv_info m =
  84          util_compute_fast_sdiv_info(d, n->bit_size);
  85
  86       nir_ssa_def *res =
  87          nir_imul_high(b, n, nir_imm_intN_t(b, m.multiplier, n->bit_size));
  88       if (d > 0 && m.multiplier < 0)
  89          res = nir_iadd(b, res, n);
  90       if (d < 0 && m.multiplier > 0)
  91          res = nir_isub(b, res, n);
  92       if (m.shift)
  93          res = nir_ishr(b, res, nir_imm_int(b, m.shift));
  94       res = nir_iadd(b, res, nir_ushr(b, res, nir_imm_int(b, n->bit_size - 1)));
  95
  96       return res;
  97    }
  98 }
  99
 100 static bool
 101 nir_opt_idiv_const_instr(nir_builder *b, nir_alu_instr *alu)
 102 {
 103    assert(alu->dest.dest.is_ssa);
 104    assert(alu->src[0].src.is_ssa && alu->src[1].src.is_ssa);
 105
 106    nir_const_value *const_denom = nir_src_as_const_value(alu->src[1].src);
 107    if (!const_denom)
 108       return false;
 109
 110    unsigned bit_size = alu->src[1].src.ssa->bit_size;
 111
 112    b->cursor = nir_before_instr(&alu->instr);
 113
 114    nir_ssa_def *q[4];
 115    for (unsigned comp = 0; comp < alu->dest.dest.ssa.num_components; comp++) {
 116       /* Get the numerator for the channel */
 117       nir_ssa_def *n = nir_channel(b, alu->src[0].src.ssa,
 118                                    alu->src[0].swizzle[comp]);
 119
 120       /* Get the denominator for the channel */
 121       int64_t d;
 122       switch (bit_size) {
 123       case 8:
 124          d = const_denom[alu->src[1].swizzle[comp]].i8;
 125          break;
 126       case 16:
 127          d = const_denom[alu->src[1].swizzle[comp]].i16;
 128          break;
 129       case 32:
 130          d = const_denom[alu->src[1].swizzle[comp]].i32;
 131          break;
 132       case 64:
 133          d = const_denom[alu->src[1].swizzle[comp]].i64;
 134          break;
 135       default:
 136          unreachable("Invalid bit size");
 137       }
 138
 139       nir_alu_type d_type = nir_op_infos[alu->op].input_types[1];
 140       if (nir_alu_type_get_base_type(d_type) == nir_type_uint) {
 141          /* The code above sign-extended.  If we're lowering an unsigned op,
 142           * we need to mask it off to the correct number of bits so that a
 143           * cast to uint64_t will do the right thing.
 144           */
 145          if (bit_size < 64)
 146             d &= (1ull << bit_size) - 1;
 147       }
 148
 149       switch (alu->op) {
 150       case nir_op_udiv:
 151          q[comp] = build_udiv(b, n, d);
 152          break;
 153       case nir_op_idiv:
 154          q[comp] = build_idiv(b, n, d);
 155          break;
 156       case nir_op_umod:
 157          q[comp] = build_umod(b, n, d);
 158          break;
 159       default:
 160          unreachable("Unknown integer division op");
 161       }
 162    }
 163
 164    nir_ssa_def *qvec = nir_vec(b, q, alu->dest.dest.ssa.num_components);
 165    nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(qvec));
 166    nir_instr_remove(&alu->instr);
 167
 168    return true;
 169 }
 170
 171 static bool
 172 nir_opt_idiv_const_impl(nir_function_impl *impl, unsigned min_bit_size)
 173 {
 174    bool progress = false;
 175
 176    nir_builder b;
 177    nir_builder_init(&b, impl);
 178
 179    nir_foreach_block(block, impl) {
 180       nir_foreach_instr_safe(instr, block) {
 181          if (instr->type != nir_instr_type_alu)
 182             continue;
 183
 184          nir_alu_instr *alu = nir_instr_as_alu(instr);
 185          if (alu->op != nir_op_udiv &&
 186              alu->op != nir_op_idiv &&
 187              alu->op != nir_op_umod)
 188             continue;
 189
 190          assert(alu->dest.dest.is_ssa);
 191          if (alu->dest.dest.ssa.bit_size < min_bit_size)
 192             continue;
 193
 194          progress |= nir_opt_idiv_const_instr(&b, alu);
 195       }
 196    }
 197
 198    if (progress) {
 199       nir_metadata_preserve(impl, nir_metadata_block_index |
 200                                   nir_metadata_dominance);
 201    }
 202
 203    return progress;
 204 }
 205
 206 bool
 207 nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size)
 208 {
 209    bool progress = false;
 210
 211    nir_foreach_function(function, shader) {
 212       if (function->impl)
 213          progress |= nir_opt_idiv_const_impl(function->impl, min_bit_size);
 214    }
 215
 216    return progress;
 217 }