src/intel/compiler/brw_nir_lower_cs_intrinsics.c

   1 /*
   2  * Copyright (c) 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "brw_nir.h"
  25 #include "compiler/nir/nir_builder.h"
  26
  27 struct lower_intrinsics_state {
  28    nir_shader *nir;
  29    nir_function_impl *impl;
  30    bool progress;
  31    nir_builder builder;
  32    unsigned local_workgroup_size;
  33 };
  34
  35 static bool
  36 lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
  37                                   nir_block *block)
  38 {
  39    bool progress = false;
  40    nir_builder *b = &state->builder;
  41    nir_shader *nir = state->nir;
  42
  43    /* Reuse calculated values inside the block. */
  44    nir_ssa_def *local_index = NULL;
  45    nir_ssa_def *local_id = NULL;
  46
  47    nir_foreach_instr_safe(instr, block) {
  48       if (instr->type != nir_instr_type_intrinsic)
  49          continue;
  50
  51       nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
  52
  53       b->cursor = nir_after_instr(&intrinsic->instr);
  54
  55       nir_ssa_def *sysval;
  56       switch (intrinsic->intrinsic) {
  57       case nir_intrinsic_load_local_invocation_index:
  58       case nir_intrinsic_load_local_invocation_id: {
  59          /* First time we are using those, so let's calculate them. */
  60          if (!local_index) {
  61             assert(!local_id);
  62
  63             nir_ssa_def *subgroup_id = nir_load_subgroup_id(b);
  64
  65             nir_ssa_def *thread_local_id =
  66                nir_imul(b, subgroup_id, nir_load_simd_width_intel(b));
  67             nir_ssa_def *channel = nir_load_subgroup_invocation(b);
  68             nir_ssa_def *linear = nir_iadd(b, channel, thread_local_id);
  69
  70             nir_ssa_def *size_x;
  71             nir_ssa_def *size_y;
  72             if (state->nir->info.cs.local_size_variable) {
  73                nir_ssa_def *size_xyz = nir_load_local_group_size(b);
  74                size_x = nir_channel(b, size_xyz, 0);
  75                size_y = nir_channel(b, size_xyz, 1);
  76             } else {
  77                size_x = nir_imm_int(b, nir->info.cs.local_size[0]);
  78                size_y = nir_imm_int(b, nir->info.cs.local_size[1]);
  79             }
  80
  81             /* The local invocation index and ID must respect the following
  82              *
  83              *    gl_LocalInvocationID.x =
  84              *       gl_LocalInvocationIndex % gl_WorkGroupSize.x;
  85              *    gl_LocalInvocationID.y =
  86              *       (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
  87              *       gl_WorkGroupSize.y;
  88              *    gl_LocalInvocationID.z =
  89              *       (gl_LocalInvocationIndex /
  90              *        (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
  91              *       gl_WorkGroupSize.z;
  92              *
  93              * However, the final % gl_WorkGroupSize.z does nothing unless we
  94              * accidentally end up with a gl_LocalInvocationIndex that is too
  95              * large so it can safely be omitted.
  96              */
  97
  98             if (state->nir->info.cs.derivative_group != DERIVATIVE_GROUP_QUADS) {
  99                /* If we are not grouping in quads, just set the local invocatio
 100                 * index linearly, and calculate local invocation ID from that.
 101                 */
 102                local_index = linear;
 103
 104                nir_ssa_def *id_x, *id_y, *id_z;
 105                id_x = nir_umod(b, local_index, size_x);
 106                id_y = nir_umod(b, nir_udiv(b, local_index, size_x), size_y);
 107                id_z = nir_udiv(b, local_index, nir_imul(b, size_x, size_y));
 108                local_id = nir_vec3(b, id_x, id_y, id_z);
 109             } else {
 110                /* For quads, first we figure out the 2x2 grid the invocation
 111                 * belongs to -- treating extra Z layers as just more rows.
 112                 * Then map that into local invocation ID (trivial) and local
 113                 * invocation index.  Skipping Z simplify index calculation.
 114                 */
 115
 116                nir_ssa_def *one = nir_imm_int(b, 1);
 117                nir_ssa_def *double_size_x = nir_ishl(b, size_x, one);
 118
 119                /* ID within a pair of rows, where each group of 4 is 2x2 quad. */
 120                nir_ssa_def *row_pair_id = nir_umod(b, linear, double_size_x);
 121                nir_ssa_def *y_row_pairs = nir_udiv(b, linear, double_size_x);
 122
 123                nir_ssa_def *x =
 124                   nir_ior(b,
 125                           nir_iand(b, row_pair_id, one),
 126                           nir_iand(b, nir_ishr(b, row_pair_id, one),
 127                                    nir_imm_int(b, 0xfffffffe)));
 128                nir_ssa_def *y =
 129                   nir_ior(b,
 130                           nir_ishl(b, y_row_pairs, one),
 131                           nir_iand(b, nir_ishr(b, row_pair_id, one), one));
 132
 133                local_id = nir_vec3(b, x,
 134                                    nir_umod(b, y, size_y),
 135                                    nir_udiv(b, y, size_y));
 136                local_index = nir_iadd(b, x, nir_imul(b, y, size_x));
 137             }
 138          }
 139
 140          assert(local_id);
 141          assert(local_index);
 142          if (intrinsic->intrinsic == nir_intrinsic_load_local_invocation_id)
 143             sysval = local_id;
 144          else
 145             sysval = local_index;
 146          break;
 147       }
 148
 149       case nir_intrinsic_load_num_subgroups: {
 150          nir_ssa_def *size;
 151          if (state->nir->info.cs.local_size_variable) {
 152             nir_ssa_def *size_xyz = nir_load_local_group_size(b);
 153             nir_ssa_def *size_x = nir_channel(b, size_xyz, 0);
 154             nir_ssa_def *size_y = nir_channel(b, size_xyz, 1);
 155             nir_ssa_def *size_z = nir_channel(b, size_xyz, 2);
 156             size = nir_imul(b, nir_imul(b, size_x, size_y), size_z);
 157          } else {
 158             size = nir_imm_int(b, nir->info.cs.local_size[0] *
 159                                   nir->info.cs.local_size[1] *
 160                                   nir->info.cs.local_size[2]);
 161          }
 162
 163          /* Calculate the equivalent of DIV_ROUND_UP. */
 164          nir_ssa_def *simd_width = nir_load_simd_width_intel(b);
 165          sysval =
 166             nir_udiv(b, nir_iadd_imm(b, nir_iadd(b, size, simd_width), -1),
 167                         simd_width);
 168          break;
 169       }
 170
 171       default:
 172          continue;
 173       }
 174
 175       nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, nir_src_for_ssa(sysval));
 176       nir_instr_remove(&intrinsic->instr);
 177
 178       state->progress = true;
 179    }
 180
 181    return progress;
 182 }
 183
 184 static void
 185 lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state)
 186 {
 187    nir_builder_init(&state->builder, state->impl);
 188
 189    nir_foreach_block(block, state->impl) {
 190       lower_cs_intrinsics_convert_block(state, block);
 191    }
 192
 193    nir_metadata_preserve(state->impl,
 194                          nir_metadata_block_index | nir_metadata_dominance);
 195 }
 196
 197 bool
 198 brw_nir_lower_cs_intrinsics(nir_shader *nir)
 199 {
 200    assert(nir->info.stage == MESA_SHADER_COMPUTE);
 201
 202    struct lower_intrinsics_state state = {
 203       .nir = nir,
 204    };
 205
 206    if (!nir->info.cs.local_size_variable) {
 207       state.local_workgroup_size = nir->info.cs.local_size[0] *
 208                                    nir->info.cs.local_size[1] *
 209                                    nir->info.cs.local_size[2];
 210    } else {
 211       state.local_workgroup_size = nir->info.cs.max_variable_local_size;
 212    }
 213
 214    /* Constraints from NV_compute_shader_derivatives. */
 215    if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS &&
 216        !nir->info.cs.local_size_variable) {
 217       assert(nir->info.cs.local_size[0] % 2 == 0);
 218       assert(nir->info.cs.local_size[1] % 2 == 0);
 219    } else if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_LINEAR &&
 220               !nir->info.cs.local_size_variable) {
 221       assert(state.local_workgroup_size % 4 == 0);
 222    }
 223
 224    nir_foreach_function(function, nir) {
 225       if (function->impl) {
 226          state.impl = function->impl;
 227          lower_cs_intrinsics_convert_impl(&state);
 228       }
 229    }
 230
 231    return state.progress;
 232 }