src/compiler/nir/nir_opt_idiv_const.c

   1 /*
   2  * Copyright © 2018 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_builder.h"
  26 #include "util/fast_idiv_by_const.h"
  27 #include "util/u_math.h"
  28
  29 static nir_ssa_def *
  30 build_udiv(nir_builder *b, nir_ssa_def *n, uint64_t d)
  31 {
  32    if (d == 0) {
  33       return nir_imm_intN_t(b, 0, n->bit_size);
  34    } else if (util_is_power_of_two_or_zero64(d)) {
  35       return nir_ushr(b, n, nir_imm_int(b, util_logbase2_64(d)));
  36    } else {
  37       struct util_fast_udiv_info m =
  38          util_compute_fast_udiv_info(d, n->bit_size, n->bit_size);
  39
  40       if (m.pre_shift)
  41          n = nir_ushr(b, n, nir_imm_int(b, m.pre_shift));
  42       if (m.increment)
  43          n = nir_uadd_sat(b, n, nir_imm_intN_t(b, m.increment, n->bit_size));
  44       n = nir_umul_high(b, n, nir_imm_intN_t(b, m.multiplier, n->bit_size));
  45       if (m.post_shift)
  46          n = nir_ushr(b, n, nir_imm_int(b, m.post_shift));
  47
  48       return n;
  49    }
  50 }
  51
  52 static nir_ssa_def *
  53 build_umod(nir_builder *b, nir_ssa_def *n, uint64_t d)
  54 {
  55    if (d == 0) {
  56       return nir_imm_intN_t(b, 0, n->bit_size);
  57    } else if (util_is_power_of_two_or_zero64(d)) {
  58       return nir_iand(b, n, nir_imm_intN_t(b, d - 1, n->bit_size));
  59    } else {
  60       return nir_isub(b, n, nir_imul(b, build_udiv(b, n, d),
  61                                         nir_imm_intN_t(b, d, n->bit_size)));
  62    }
  63 }
  64
  65 static nir_ssa_def *
  66 build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d)
  67 {
  68    if (d == 0) {
  69       return nir_imm_intN_t(b, 0, n->bit_size);
  70    } else if (d == 1) {
  71       return n;
  72    } else if (d == -1) {
  73       return nir_ineg(b, n);
  74    } else if (util_is_power_of_two_or_zero64(d)) {
  75       uint64_t abs_d = d < 0 ? -d : d;
  76       nir_ssa_def *uq = nir_ishr(b, n, nir_imm_int(b, util_logbase2_64(abs_d)));
  77       nir_ssa_def *n_neg = nir_ilt(b, n, nir_imm_intN_t(b, 0, n->bit_size));
  78       nir_ssa_def *neg = d < 0 ? nir_inot(b, n_neg) : n_neg;
  79       return nir_bcsel(b, neg, nir_ineg(b, uq), uq);
  80    } else {
  81       struct util_fast_sdiv_info m =
  82          util_compute_fast_sdiv_info(d, n->bit_size);
  83
  84       nir_ssa_def *res =
  85          nir_imul_high(b, n, nir_imm_intN_t(b, m.multiplier, n->bit_size));
  86       if (d > 0 && m.multiplier < 0)
  87          res = nir_iadd(b, res, n);
  88       if (d < 0 && m.multiplier > 0)
  89          res = nir_isub(b, res, n);
  90       if (m.shift)
  91          res = nir_ishr(b, res, nir_imm_int(b, m.shift));
  92       res = nir_iadd(b, res, nir_ushr(b, res, nir_imm_int(b, n->bit_size - 1)));
  93
  94       return res;
  95    }
  96 }
  97
  98 static bool
  99 nir_opt_idiv_const_instr(nir_builder *b, nir_alu_instr *alu)
 100 {
 101    assert(alu->dest.dest.is_ssa);
 102    assert(alu->src[0].src.is_ssa && alu->src[1].src.is_ssa);
 103
 104    nir_const_value *const_denom = nir_src_as_const_value(alu->src[1].src);
 105    if (!const_denom)
 106       return false;
 107
 108    unsigned bit_size = alu->src[1].src.ssa->bit_size;
 109
 110    b->cursor = nir_before_instr(&alu->instr);
 111
 112    nir_ssa_def *q[4];
 113    for (unsigned comp = 0; comp < alu->dest.dest.ssa.num_components; comp++) {
 114       /* Get the numerator for the channel */
 115       nir_ssa_def *n = nir_channel(b, alu->src[0].src.ssa,
 116                                    alu->src[0].swizzle[comp]);
 117
 118       /* Get the denominator for the channel */
 119       int64_t d;
 120       switch (bit_size) {
 121       case 8:
 122          d = const_denom->i8[alu->src[1].swizzle[comp]];
 123          break;
 124       case 16:
 125          d = const_denom->i16[alu->src[1].swizzle[comp]];
 126          break;
 127       case 32:
 128          d = const_denom->i32[alu->src[1].swizzle[comp]];
 129          break;
 130       case 64:
 131          d = const_denom->i64[alu->src[1].swizzle[comp]];
 132          break;
 133       default:
 134          unreachable("Invalid bit size");
 135       }
 136
 137       nir_alu_type d_type = nir_op_infos[alu->op].input_types[1];
 138       if (nir_alu_type_get_base_type(d_type) == nir_type_uint) {
 139          /* The code above sign-extended.  If we're lowering an unsigned op,
 140           * we need to mask it off to the correct number of bits so that a
 141           * cast to uint64_t will do the right thing.
 142           */
 143          if (bit_size < 64)
 144             d &= (1ull << bit_size) - 1;
 145       }
 146
 147       switch (alu->op) {
 148       case nir_op_udiv:
 149          q[comp] = build_udiv(b, n, d);
 150          break;
 151       case nir_op_idiv:
 152          q[comp] = build_idiv(b, n, d);
 153          break;
 154       case nir_op_umod:
 155          q[comp] = build_umod(b, n, d);
 156          break;
 157       default:
 158          unreachable("Unknown integer division op");
 159       }
 160    }
 161
 162    nir_ssa_def *qvec = nir_vec(b, q, alu->dest.dest.ssa.num_components);
 163    nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(qvec));
 164    nir_instr_remove(&alu->instr);
 165
 166    return true;
 167 }
 168
 169 static bool
 170 nir_opt_idiv_const_impl(nir_function_impl *impl, unsigned min_bit_size)
 171 {
 172    bool progress = false;
 173
 174    nir_builder b;
 175    nir_builder_init(&b, impl);
 176
 177    nir_foreach_block(block, impl) {
 178       nir_foreach_instr_safe(instr, block) {
 179          if (instr->type != nir_instr_type_alu)
 180             continue;
 181
 182          nir_alu_instr *alu = nir_instr_as_alu(instr);
 183          if (alu->op != nir_op_udiv &&
 184              alu->op != nir_op_idiv &&
 185              alu->op != nir_op_umod)
 186             continue;
 187
 188          assert(alu->dest.dest.is_ssa);
 189          if (alu->dest.dest.ssa.bit_size < min_bit_size)
 190             continue;
 191
 192          progress |= nir_opt_idiv_const_instr(&b, alu);
 193       }
 194    }
 195
 196    if (progress) {
 197       nir_metadata_preserve(impl, nir_metadata_block_index |
 198                                   nir_metadata_dominance);
 199    }
 200
 201    return progress;
 202 }
 203
 204 bool
 205 nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size)
 206 {
 207    bool progress = false;
 208
 209    nir_foreach_function(function, shader) {
 210       if (function->impl)
 211          progress |= nir_opt_idiv_const_impl(function->impl, min_bit_size);
 212    }
 213
 214    return progress;
 215 }