src/compiler/nir/nir_lower_idiv.c

   1 /*
   2  * Copyright © 2015 Red Hat
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *    Rob Clark <robclark@freedesktop.org>
  25  */
  26
  27 #include "nir.h"
  28 #include "nir_builder.h"
  29
  30 /* Lowers idiv/udiv/umod
  31  * Based on NV50LegalizeSSA::handleDIV()
  32  *
  33  * Note that this is probably not enough precision for compute shaders.
  34  * Perhaps we want a second higher precision (looping) version of this?
  35  * Or perhaps we assume if you can do compute shaders you can also
  36  * branch out to a pre-optimized shader library routine..
  37  */
  38
  39 static bool
  40 convert_instr(nir_builder *bld, nir_alu_instr *alu)
  41 {
  42    nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r, *rt;
  43    nir_op op = alu->op;
  44    bool is_signed;
  45
  46    if ((op != nir_op_idiv) &&
  47        (op != nir_op_udiv) &&
  48        (op != nir_op_imod) &&
  49        (op != nir_op_umod) &&
  50        (op != nir_op_irem))
  51       return false;
  52
  53    is_signed = (op == nir_op_idiv ||
  54                 op == nir_op_imod ||
  55                 op == nir_op_irem);
  56
  57    bld->cursor = nir_before_instr(&alu->instr);
  58
  59    numer = nir_ssa_for_alu_src(bld, alu, 0);
  60    denom = nir_ssa_for_alu_src(bld, alu, 1);
  61
  62    if (is_signed) {
  63       af = nir_i2f32(bld, numer);
  64       bf = nir_i2f32(bld, denom);
  65       af = nir_fabs(bld, af);
  66       bf = nir_fabs(bld, bf);
  67       a  = nir_iabs(bld, numer);
  68       b  = nir_iabs(bld, denom);
  69    } else {
  70       af = nir_u2f32(bld, numer);
  71       bf = nir_u2f32(bld, denom);
  72       a  = numer;
  73       b  = denom;
  74    }
  75
  76    /* get first result: */
  77    bf = nir_frcp(bld, bf);
  78    bf = nir_isub(bld, bf, nir_imm_int(bld, 2));  /* yes, really */
  79    q  = nir_fmul(bld, af, bf);
  80
  81    if (is_signed) {
  82       q = nir_f2i32(bld, q);
  83    } else {
  84       q = nir_f2u32(bld, q);
  85    }
  86
  87    /* get error of first result: */
  88    r = nir_imul(bld, q, b);
  89    r = nir_isub(bld, a, r);
  90    r = nir_u2f32(bld, r);
  91    r = nir_fmul(bld, r, bf);
  92    r = nir_f2u32(bld, r);
  93
  94    /* add quotients: */
  95    q = nir_iadd(bld, q, r);
  96
  97    /* correction: if modulus >= divisor, add 1 */
  98    r = nir_imul(bld, q, b);
  99    r = nir_isub(bld, a, r);
 100    rt = nir_uge(bld, r, b);
 101
 102    if (op == nir_op_umod) {
 103       q = nir_bcsel(bld, rt, nir_isub(bld, r, b), r);
 104    } else {
 105       r = nir_b2i32(bld, rt);
 106
 107       q = nir_iadd(bld, q, r);
 108       if (is_signed)  {
 109          /* fix the sign: */
 110          r = nir_ixor(bld, numer, denom);
 111          r = nir_ilt(bld, r, nir_imm_int(bld, 0));
 112          b = nir_ineg(bld, q);
 113          q = nir_bcsel(bld, r, b, q);
 114
 115          if (op == nir_op_imod || op == nir_op_irem) {
 116             q = nir_imul(bld, q, denom);
 117             q = nir_isub(bld, numer, q);
 118             if (op == nir_op_imod) {
 119                q = nir_bcsel(bld, nir_ieq(bld, q, nir_imm_int(bld, 0)),
 120                              nir_imm_int(bld, 0),
 121                              nir_bcsel(bld, r, nir_iadd(bld, q, denom), q));
 122             }
 123          }
 124       }
 125    }
 126
 127    assert(alu->dest.dest.is_ssa);
 128    nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
 129
 130    return true;
 131 }
 132
 133 static bool
 134 convert_impl(nir_function_impl *impl)
 135 {
 136    nir_builder b;
 137    nir_builder_init(&b, impl);
 138    bool progress = false;
 139
 140    nir_foreach_block(block, impl) {
 141       nir_foreach_instr_safe(instr, block) {
 142          if (instr->type == nir_instr_type_alu)
 143             progress |= convert_instr(&b, nir_instr_as_alu(instr));
 144       }
 145    }
 146
 147    nir_metadata_preserve(impl, nir_metadata_block_index |
 148                                nir_metadata_dominance);
 149
 150    return progress;
 151 }
 152
 153 bool
 154 nir_lower_idiv(nir_shader *shader)
 155 {
 156    bool progress = false;
 157
 158    nir_foreach_function(function, shader) {
 159       if (function->impl)
 160          progress |= convert_impl(function->impl);
 161    }
 162
 163    return progress;
 164 }