src/compiler/nir/nir_lower_amul.c

   1 /*
   2  * Copyright © 2019 Google, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23
  24 #include "nir.h"
  25 #include "nir_vla.h"
  26
  27 /* Lowering for amul instructions, for drivers that support imul24.
  28  * This pass will analyze indirect derefs, and convert corresponding
  29  * amul instructions to either imul or imul24, depending on the
  30  * required range.
  31  *
  32  * 1) Analyze the uniform variables and build a table of UBOs and SSBOs
  33  *    that are either too large, or might be too large (unknown size)
  34  *    for imul24
  35  *
  36  * 2) Loop thru looking at all the intrinsics, finding dereferences of
  37  *    large variables, and recursively replacing all amul instructions
  38  *    used with imul
  39  *
  40  * 3) Finally loop again thru all instructions replacing any remaining
  41  *    amul with imul24.  At this point any remaining amul instructions
  42  *    are not involved in calculating an offset into a large variable,
  43  *    thanks to the 2nd step, so they can be safely replace with imul24.
  44  *
  45  * Using two passes over all the instructions lets us handle the case
  46  * where, due to CSE, an amul is used to calculate an offset into both
  47  * a large and small variable.
  48  */
  49
  50 typedef struct {
  51    nir_shader *shader;
  52
  53    int (*type_size)(const struct glsl_type *, bool);
  54
  55    /* Tables of UBOs and SSBOs mapping driver_location/base whether
  56     * they are too large to use imul24:
  57     */
  58    bool *large_ubos;
  59    bool *large_ssbos;
  60
  61    /* for cases that we cannot determine UBO/SSBO index, track if *any*
  62     * UBO/SSBO is too large for imul24:
  63     */
  64    bool has_large_ubo;
  65    bool has_large_ssbo;
  66
  67    unsigned max_slot;
  68 } lower_state;
  69
  70 /* Lower 'amul's in offset src of large variables to 'imul': */
  71 static bool
  72 lower_large_src(nir_src *src, void *s)
  73 {
  74    lower_state *state = s;
  75
  76    assert(src->is_ssa);
  77
  78    nir_instr *parent = src->ssa->parent_instr;
  79
  80    /* No need to visit instructions we've already visited.. this also
  81     * avoids infinite recursion when phi's are involved:
  82     */
  83    if (parent->pass_flags)
  84       return false;
  85
  86    bool progress = nir_foreach_src(parent, lower_large_src, state);
  87
  88    if (parent->type == nir_instr_type_alu) {
  89       nir_alu_instr *alu = nir_instr_as_alu(parent);
  90       if (alu->op == nir_op_amul) {
  91          alu->op = nir_op_imul;
  92          progress = true;
  93       }
  94    }
  95
  96    parent->pass_flags = 1;
  97
  98    return progress;
  99 }
 100
 101 static bool
 102 large_ubo(lower_state *state, nir_src src)
 103 {
 104    if (!nir_src_is_const(src))
 105       return state->has_large_ubo;
 106    unsigned idx = nir_src_as_uint(src);
 107    assert(idx < state->shader->info.num_ubos);
 108    return state->large_ubos[idx];
 109 }
 110
 111 static bool
 112 large_ssbo(lower_state *state, nir_src src)
 113 {
 114    if (!nir_src_is_const(src))
 115       return state->has_large_ssbo;
 116    unsigned idx = nir_src_as_uint(src);
 117    assert(idx < state->shader->info.num_ssbos);
 118    return state->large_ssbos[idx];
 119 }
 120
 121 static bool
 122 lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr)
 123 {
 124    switch (intr->intrinsic) {
 125    case nir_intrinsic_load_ubo:
 126       //# src[] = { buffer_index, offset }.
 127       if (large_ubo(state, intr->src[0]))
 128          return lower_large_src(&intr->src[1], state);
 129       return false;
 130
 131    case nir_intrinsic_load_ssbo:
 132       //# src[] = { buffer_index, offset }.
 133       if (large_ssbo(state, intr->src[0]))
 134          return lower_large_src(&intr->src[1], state);
 135       return false;
 136
 137    case nir_intrinsic_store_ssbo:
 138       //# src[] = { value, block_index, offset }
 139       if (large_ssbo(state, intr->src[1]))
 140          return lower_large_src(&intr->src[2], state);
 141       return false;
 142
 143    case nir_intrinsic_ssbo_atomic_add:
 144    case nir_intrinsic_ssbo_atomic_imin:
 145    case nir_intrinsic_ssbo_atomic_umin:
 146    case nir_intrinsic_ssbo_atomic_imax:
 147    case nir_intrinsic_ssbo_atomic_umax:
 148    case nir_intrinsic_ssbo_atomic_and:
 149    case nir_intrinsic_ssbo_atomic_or:
 150    case nir_intrinsic_ssbo_atomic_xor:
 151    case nir_intrinsic_ssbo_atomic_exchange:
 152    case nir_intrinsic_ssbo_atomic_comp_swap:
 153    case nir_intrinsic_ssbo_atomic_fadd:
 154    case nir_intrinsic_ssbo_atomic_fmin:
 155    case nir_intrinsic_ssbo_atomic_fmax:
 156    case nir_intrinsic_ssbo_atomic_fcomp_swap:
 157       /* 0: SSBO index
 158        * 1: offset
 159        */
 160       if (large_ssbo(state, intr->src[0]))
 161          return lower_large_src(&intr->src[1], state);
 162       return false;
 163
 164    case nir_intrinsic_global_atomic_add:
 165    case nir_intrinsic_global_atomic_imin:
 166    case nir_intrinsic_global_atomic_umin:
 167    case nir_intrinsic_global_atomic_imax:
 168    case nir_intrinsic_global_atomic_umax:
 169    case nir_intrinsic_global_atomic_and:
 170    case nir_intrinsic_global_atomic_or:
 171    case nir_intrinsic_global_atomic_xor:
 172    case nir_intrinsic_global_atomic_exchange:
 173    case nir_intrinsic_global_atomic_comp_swap:
 174    case nir_intrinsic_global_atomic_fadd:
 175    case nir_intrinsic_global_atomic_fmin:
 176    case nir_intrinsic_global_atomic_fmax:
 177    case nir_intrinsic_global_atomic_fcomp_swap:
 178       /* just assume we that 24b is not sufficient: */
 179       return lower_large_src(&intr->src[0], state);
 180
 181    /* These should all be small enough to unconditionally use imul24: */
 182    case nir_intrinsic_shared_atomic_add:
 183    case nir_intrinsic_shared_atomic_imin:
 184    case nir_intrinsic_shared_atomic_umin:
 185    case nir_intrinsic_shared_atomic_imax:
 186    case nir_intrinsic_shared_atomic_umax:
 187    case nir_intrinsic_shared_atomic_and:
 188    case nir_intrinsic_shared_atomic_or:
 189    case nir_intrinsic_shared_atomic_xor:
 190    case nir_intrinsic_shared_atomic_exchange:
 191    case nir_intrinsic_shared_atomic_comp_swap:
 192    case nir_intrinsic_shared_atomic_fadd:
 193    case nir_intrinsic_shared_atomic_fmin:
 194    case nir_intrinsic_shared_atomic_fmax:
 195    case nir_intrinsic_shared_atomic_fcomp_swap:
 196    case nir_intrinsic_load_uniform:
 197    case nir_intrinsic_load_input:
 198    case nir_intrinsic_load_output:
 199    case nir_intrinsic_store_output:
 200    default:
 201       return false;
 202    }
 203 }
 204
 205 static bool
 206 lower_instr(lower_state *state, nir_instr *instr)
 207 {
 208    bool progress = false;
 209
 210    if (instr->type == nir_instr_type_intrinsic) {
 211       progress |= lower_intrinsic(state, nir_instr_as_intrinsic(instr));
 212    }
 213
 214    return progress;
 215 }
 216
 217 static bool
 218 is_large(lower_state *state, nir_variable *var)
 219 {
 220    const struct glsl_type *type = glsl_without_array(var->type);
 221    unsigned size = state->type_size(type, false);
 222
 223    /* if size is not known (ie. VLA) then assume the worst: */
 224    if (!size)
 225       return true;
 226
 227    return size >= (1 << 23);
 228 }
 229
 230 bool
 231 nir_lower_amul(nir_shader *shader,
 232                int (*type_size)(const struct glsl_type *, bool))
 233 {
 234    assert(shader->options->has_imul24);
 235    assert(type_size);
 236
 237    NIR_VLA_FILL(bool, large_ubos, shader->info.num_ubos, 0);
 238    NIR_VLA_FILL(bool, large_ssbos, shader->info.num_ssbos, 0);
 239
 240    lower_state state = {
 241       .shader = shader,
 242       .type_size = type_size,
 243       .large_ubos = large_ubos,
 244       .large_ssbos = large_ssbos,
 245    };
 246
 247    /* Figure out which UBOs or SSBOs are large enough to be
 248     * disqualified from imul24:
 249     */
 250    nir_foreach_variable_in_shader (var, shader) {
 251       if (var->data.mode == nir_var_mem_ubo) {
 252          if (is_large(&state, var)) {
 253             state.has_large_ubo = true;
 254             unsigned size = MAX2(1, glsl_array_size(var->type));
 255             for (unsigned i = 0; i < size; i++)
 256                state.large_ubos[var->data.binding + i] = true;
 257          }
 258       } else if (var->data.mode == nir_var_mem_ssbo) {
 259          if (is_large(&state, var)) {
 260             state.has_large_ssbo = true;
 261             unsigned size = MAX2(1, glsl_array_size(var->type));
 262             for (unsigned i = 0; i < size; i++)
 263                state.large_ssbos[var->data.binding + i] = true;
 264          }
 265       }
 266    }
 267
 268    /* clear pass flags: */
 269    nir_foreach_function(function, shader) {
 270       nir_function_impl *impl = function->impl;
 271       if (!impl)
 272          continue;
 273
 274       nir_foreach_block(block, impl) {
 275          nir_foreach_instr(instr, block) {
 276             instr->pass_flags = 0;
 277          }
 278       }
 279    }
 280
 281    bool progress = false;
 282    nir_foreach_function(function, shader) {
 283       nir_function_impl *impl = function->impl;
 284
 285       if (!impl)
 286          continue;
 287
 288       nir_foreach_block(block, impl) {
 289          nir_foreach_instr(instr, block) {
 290             progress |= lower_instr(&state, instr);
 291          }
 292       }
 293    }
 294
 295    /* At this point, all 'amul's used in calculating an offset into
 296     * a large variable have been replaced with 'imul'.  So remaining
 297     * 'amul's can be replaced with 'imul24':
 298     */
 299    nir_foreach_function(function, shader) {
 300       nir_function_impl *impl = function->impl;
 301
 302       if (!impl)
 303          continue;
 304
 305       nir_foreach_block(block, impl) {
 306          nir_foreach_instr(instr, block) {
 307             if (instr->type != nir_instr_type_alu)
 308                continue;
 309
 310             nir_alu_instr *alu = nir_instr_as_alu(instr);
 311             if (alu->op != nir_op_amul)
 312                continue;
 313
 314             alu->op = nir_op_imul24;
 315             progress |= true;
 316          }
 317       }
 318
 319       nir_metadata_preserve(impl, nir_metadata_block_index |
 320                                   nir_metadata_dominance);
 321
 322    }
 323
 324    return progress;
 325 }