src/intel/vulkan/anv_nir_compute_push_layout.c

   1 /*
   2  * Copyright © 2019 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "anv_nir.h"
  25 #include "compiler/brw_nir.h"
  26 #include "util/mesa-sha1.h"
  27
  28 void
  29 anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
  30                             nir_shader *nir,
  31                             struct brw_stage_prog_data *prog_data,
  32                             struct anv_pipeline_bind_map *map,
  33                             void *mem_ctx)
  34 {
  35    memset(map->push_ranges, 0, sizeof(map->push_ranges));
  36
  37    unsigned push_start = UINT_MAX, push_end = 0;
  38    nir_foreach_function(function, nir) {
  39       if (!function->impl)
  40          continue;
  41
  42       nir_foreach_block(block, function->impl) {
  43          nir_foreach_instr(instr, block) {
  44             if (instr->type != nir_instr_type_intrinsic)
  45                continue;
  46
  47             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  48             if (intrin->intrinsic != nir_intrinsic_load_push_constant)
  49                continue;
  50
  51             unsigned base = nir_intrinsic_base(intrin);
  52             unsigned range = nir_intrinsic_range(intrin);
  53             push_start = MIN2(push_start, base);
  54             push_end = MAX2(push_end, base + range);
  55          }
  56       }
  57    }
  58
  59    const bool has_push_intrinsic = push_start <= push_end;
  60
  61    if (nir->info.stage == MESA_SHADER_COMPUTE) {
  62       /* For compute shaders, we always have to have the subgroup ID.  The
  63        * back-end compiler will "helpfully" add it for us in the last push
  64        * constant slot.  Yes, there is an off-by-one error here but that's
  65        * because the back-end will add it so we want to claim the number of
  66        * push constants one dword less than the full amount including
  67        * gl_SubgroupId.
  68        */
  69       assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));
  70       push_end = offsetof(struct anv_push_constants, cs.subgroup_id);
  71    }
  72
  73    /* Align push_start down to a 32B boundary and make it no larger than
  74     * push_end (no push constants is indicated by push_start = UINT_MAX).
  75     */
  76    push_start = MIN2(push_start, push_end);
  77    push_start &= ~31u;
  78
  79    if (has_push_intrinsic) {
  80       nir_foreach_function(function, nir) {
  81          if (!function->impl)
  82             continue;
  83
  84          nir_foreach_block(block, function->impl) {
  85             nir_foreach_instr(instr, block) {
  86                if (instr->type != nir_instr_type_intrinsic)
  87                   continue;
  88
  89                nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  90                if (intrin->intrinsic != nir_intrinsic_load_push_constant)
  91                   continue;
  92
  93                intrin->intrinsic = nir_intrinsic_load_uniform;
  94                nir_intrinsic_set_base(intrin,
  95                                       nir_intrinsic_base(intrin) -
  96                                       push_start);
  97             }
  98          }
  99       }
 100    }
 101
 102    /* For vec4 our push data size needs to be aligned to a vec4 and for
 103     * scalar, it needs to be aligned to a DWORD.
 104     */
 105    const unsigned align =
 106       pdevice->compiler->scalar_stage[nir->info.stage] ? 4 : 16;
 107    nir->num_uniforms = ALIGN(push_end - push_start, align);
 108    prog_data->nr_params = nir->num_uniforms / 4;
 109    prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
 110
 111    struct anv_push_range push_constant_range = {
 112       .set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
 113       .start = push_start / 32,
 114       .length = DIV_ROUND_UP(push_end - push_start, 32),
 115    };
 116
 117    if ((pdevice->info.gen >= 8 || pdevice->info.is_haswell) &&
 118        nir->info.stage != MESA_SHADER_COMPUTE) {
 119       brw_nir_analyze_ubo_ranges(pdevice->compiler, nir, NULL,
 120                                  prog_data->ubo_ranges);
 121
 122       /* We can push at most 64 registers worth of data.  The back-end
 123        * compiler would do this fixup for us but we'd like to calculate
 124        * the push constant layout ourselves.
 125        */
 126       unsigned total_push_regs = push_constant_range.length;
 127       for (unsigned i = 0; i < 4; i++) {
 128          if (total_push_regs + prog_data->ubo_ranges[i].length > 64)
 129             prog_data->ubo_ranges[i].length = 64 - total_push_regs;
 130          total_push_regs += prog_data->ubo_ranges[i].length;
 131       }
 132       assert(total_push_regs <= 64);
 133
 134       int n = 0;
 135
 136       if (push_constant_range.length > 0)
 137          map->push_ranges[n++] = push_constant_range;
 138
 139       for (int i = 0; i < 4; i++) {
 140          const struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
 141          if (ubo_range->length == 0)
 142             continue;
 143
 144          const struct anv_pipeline_binding *binding =
 145             &map->surface_to_descriptor[ubo_range->block];
 146
 147          map->push_ranges[n++] = (struct anv_push_range) {
 148             .set = binding->set,
 149             .index = binding->index,
 150             .dynamic_offset_index = binding->dynamic_offset_index,
 151             .start = ubo_range->start,
 152             .length = ubo_range->length,
 153          };
 154       }
 155    } else {
 156       /* For Ivy Bridge, the push constants packets have a different
 157        * rule that would require us to iterate in the other direction
 158        * and possibly mess around with dynamic state base address.
 159        * Don't bother; just emit regular push constants at n = 0.
 160        *
 161        * In the compute case, we don't have multiple push ranges so it's
 162        * better to just provide one in push_ranges[0].
 163        */
 164       map->push_ranges[0] = push_constant_range;
 165    }
 166
 167    /* Now that we're done computing the push constant portion of the
 168     * bind map, hash it.  This lets us quickly determine if the actual
 169     * mapping has changed and not just a no-op pipeline change.
 170     */
 171    _mesa_sha1_compute(map->push_ranges,
 172                       sizeof(map->push_ranges),
 173                       map->push_sha1);
 174 }
 175
 176 void
 177 anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
 178                              struct anv_pipeline_bind_map *map)
 179 {
 180 #ifndef NDEBUG
 181    unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);
 182    for (unsigned i = 0; i < 4; i++)
 183       prog_data_push_size += prog_data->ubo_ranges[i].length;
 184
 185    unsigned bind_map_push_size = 0;
 186    for (unsigned i = 0; i < 4; i++)
 187       bind_map_push_size += map->push_ranges[i].length;
 188
 189    /* We could go through everything again but it should be enough to assert
 190     * that they push the same number of registers.  This should alert us if
 191     * the back-end compiler decides to re-arrange stuff or shrink a range.
 192     */
 193    assert(prog_data_push_size == bind_map_push_size);
 194 #endif
 195 }