anv: Rework push constant handling
authorJason Ekstrand <jason@jlekstrand.net>
Thu, 7 Nov 2019 23:16:14 +0000 (17:16 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Mon, 18 Nov 2019 18:35:14 +0000 (18:35 +0000)
This substantially reworks both the state setup side of push constant
handling and the pipeline compile side.  The fundamental change here is
that we're no longer respecting the prog_data::param array and instead
are just instructing the back-end compiler to leave the array alone.
This makes the state setup side substantially simpler because we can now
just memcpy the whole block of push constants and don't have to
upload one DWORD at a time.

This also means that we can compute the full push constant layout
up-front and just trust the back-end compiler to not mess with it.
Maybe one day we'll decide that the back-end compiler can do useful
things there again but for now, this is functionally no different from
what we had before this commit and makes the NIR handling cleaner.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
src/intel/Makefile.sources
src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_nir.h
src/intel/vulkan/anv_nir_add_base_work_group_id.c
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
src/intel/vulkan/anv_nir_compute_push_layout.c
src/intel/vulkan/anv_nir_lower_push_constants.c [deleted file]
src/intel/vulkan/anv_pipeline.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/meson.build

index b3861af32f55109829962b2a543197cb3159cc4e..252a0cb5320db93be555bc129812b287a9437a1a 100644 (file)
@@ -260,7 +260,6 @@ VULKAN_FILES := \
        vulkan/anv_nir_apply_pipeline_layout.c \
        vulkan/anv_nir_compute_push_layout.c \
        vulkan/anv_nir_lower_multiview.c \
-       vulkan/anv_nir_lower_push_constants.c \
        vulkan/anv_nir_lower_ycbcr_textures.c \
        vulkan/anv_pass.c \
        vulkan/anv_perf.c \
index 12ab3a1f7285ac9124343e9a43beb487bcf59236..f14225b963ae39f5f2f70ab695e718258da1ee97 100644 (file)
@@ -745,71 +745,18 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
    return state;
 }
 
-static uint32_t
-anv_push_constant_value(const struct anv_cmd_pipeline_state *state,
-                        const struct anv_push_constants *data, uint32_t param)
-{
-   if (BRW_PARAM_IS_BUILTIN(param)) {
-      switch (param) {
-      case BRW_PARAM_BUILTIN_ZERO:
-         return 0;
-      case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X:
-         return data->cs.base_work_group_id[0];
-      case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y:
-         return data->cs.base_work_group_id[1];
-      case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z:
-         return data->cs.base_work_group_id[2];
-      default:
-         unreachable("Invalid param builtin");
-      }
-   } else if (ANV_PARAM_IS_PUSH(param)) {
-      uint32_t offset = ANV_PARAM_PUSH_OFFSET(param);
-      assert(offset % sizeof(uint32_t) == 0);
-      if (offset < sizeof(data->client_data))
-         return *(uint32_t *)((uint8_t *)data + offset);
-      else
-         return 0;
-   } else if (ANV_PARAM_IS_DYN_OFFSET(param)) {
-      unsigned idx = ANV_PARAM_DYN_OFFSET_IDX(param);
-      assert(idx < MAX_DYNAMIC_BUFFERS);
-      return data->dynamic_offsets[idx];
-   }
-
-   assert(!"Invalid param");
-   return 0;
-}
-
 struct anv_state
 anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
                               gl_shader_stage stage)
 {
-   struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.gfx.base;
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
-
-   /* If we don't have this stage, bail. */
-   if (!anv_pipeline_has_stage(pipeline, stage))
-      return (struct anv_state) { .offset = 0 };
-
    struct anv_push_constants *data =
       &cmd_buffer->state.push_constants[stage];
-   const struct brw_stage_prog_data *prog_data =
-      pipeline->shaders[stage]->prog_data;
-
-   /* If we don't actually have any push constants, bail. */
-   if (prog_data == NULL || prog_data->nr_params == 0)
-      return (struct anv_state) { .offset = 0 };
 
    struct anv_state state =
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
-                                         prog_data->nr_params * sizeof(float),
+                                         sizeof(struct anv_push_constants),
                                          32 /* bottom 5 bits MBZ */);
-
-   /* Walk through the param array and fill the buffer with data */
-   uint32_t *u32_map = state.map;
-   for (unsigned i = 0; i < prog_data->nr_params; i++) {
-      u32_map[i] = anv_push_constant_value(pipeline_state, data,
-                                           prog_data->param[i]);
-   }
+   memcpy(state.map, data, sizeof(struct anv_push_constants));
 
    return state;
 }
@@ -817,14 +764,13 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
 struct anv_state
 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
-   struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.compute.base;
    struct anv_push_constants *data =
       &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
    struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
-   const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+   const struct anv_push_range *range =
+      &pipeline->shaders[MESA_SHADER_COMPUTE]->bind_map.push_ranges[0];
 
-   /* If we don't actually have any push constants, bail. */
    if (cs_prog_data->push.total.size == 0)
       return (struct anv_state) { .offset = 0 };
 
@@ -837,33 +783,25 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
                                          aligned_total_push_constants_size,
                                          push_constant_alignment);
 
-   /* Walk through the param array and fill the buffer with data */
-   uint32_t *u32_map = state.map;
+   void *dst = state.map;
+   const void *src = (char *)data + (range->start * 32);
 
    if (cs_prog_data->push.cross_thread.size > 0) {
-      for (unsigned i = 0;
-           i < cs_prog_data->push.cross_thread.dwords;
-           i++) {
-         assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID);
-         u32_map[i] = anv_push_constant_value(pipeline_state, data,
-                                              prog_data->param[i]);
-      }
+      memcpy(dst, src, cs_prog_data->push.cross_thread.size);
+      dst += cs_prog_data->push.cross_thread.size;
+      src += cs_prog_data->push.cross_thread.size;
    }
 
    if (cs_prog_data->push.per_thread.size > 0) {
       for (unsigned t = 0; t < cs_prog_data->threads; t++) {
-         unsigned dst =
-            8 * (cs_prog_data->push.per_thread.regs * t +
-                 cs_prog_data->push.cross_thread.regs);
-         unsigned src = cs_prog_data->push.cross_thread.dwords;
-         for ( ; src < prog_data->nr_params; src++, dst++) {
-            if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) {
-               u32_map[dst] = t;
-            } else {
-               u32_map[dst] = anv_push_constant_value(pipeline_state, data,
-                                                      prog_data->param[src]);
-            }
-         }
+         memcpy(dst, src, cs_prog_data->push.per_thread.size);
+
+         uint32_t *subgroup_id = dst +
+            offsetof(struct anv_push_constants, cs.subgroup_id) -
+            (range->start * 32 + cs_prog_data->push.cross_thread.size);
+         *subgroup_id = t;
+
+         dst += cs_prog_data->push.per_thread.size;
       }
    }
 
index 0fc507c12a80a882564fcb1b8a4921cbde9e6a9d..250f75e99367eb48cc77c60b99efb9734fef756a 100644 (file)
@@ -560,7 +560,7 @@ anv_physical_device_init(struct anv_physical_device *device,
    device->compiler->constant_buffer_0_is_relative =
       device->info.gen < 8 || !device->has_context_isolation;
    device->compiler->supports_shader_constants = true;
-   device->compiler->compact_params = true;
+   device->compiler->compact_params = false;
 
    /* Broadwell PRM says:
     *
index 8977599607e52774c8f2916f6fbc3756d3611c4c..361023e1593fda2e793abd8d1340cd933441bc92 100644 (file)
@@ -31,8 +31,6 @@
 extern "C" {
 #endif
 
-void anv_nir_lower_push_constants(nir_shader *shader);
-
 bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
 
 bool anv_nir_lower_ycbcr_textures(nir_shader *shader,
@@ -59,12 +57,16 @@ void anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
                                    struct brw_stage_prog_data *prog_data,
                                    struct anv_pipeline_bind_map *map);
 
-void anv_compute_push_layout(const struct anv_physical_device *pdevice,
-                             struct brw_stage_prog_data *prog_data,
-                             struct anv_pipeline_bind_map *map);
+void anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
+                                 nir_shader *nir,
+                                 struct brw_stage_prog_data *prog_data,
+                                 struct anv_pipeline_bind_map *map,
+                                 void *mem_ctx);
+
+void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
+                                  struct anv_pipeline_bind_map *map);
 
-bool anv_nir_add_base_work_group_id(nir_shader *shader,
-                                    struct brw_cs_prog_data *prog_data);
+bool anv_nir_add_base_work_group_id(nir_shader *shader);
 
 #ifdef __cplusplus
 }
index a7c290bb52fdafcb986b698e8bb57dbca0d714f3..38892b96ec5477dc6a77c128293667839fb8f72f 100644 (file)
 #include "compiler/brw_compiler.h"
 
 bool
-anv_nir_add_base_work_group_id(nir_shader *shader,
-                               struct brw_cs_prog_data *prog_data)
+anv_nir_add_base_work_group_id(nir_shader *shader)
 {
    assert(shader->info.stage == MESA_SHADER_COMPUTE);
 
    nir_builder b;
-   int base_id_offset = -1;
    bool progress = false;
    nir_foreach_function(function, shader) {
       if (!function->impl)
@@ -51,27 +49,14 @@ anv_nir_add_base_work_group_id(nir_shader *shader,
 
             b.cursor = nir_after_instr(&load_id->instr);
 
-            if (base_id_offset < 0) {
-               /* If we don't have a set of BASE_WORK_GROUP_ID params,
-                * add them.
-                */
-               assert(shader->num_uniforms == prog_data->base.nr_params * 4);
-               uint32_t *param =
-                  brw_stage_prog_data_add_params(&prog_data->base, 3);
-               param[0] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X;
-               param[1] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y;
-               param[2] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z;
-
-               base_id_offset = shader->num_uniforms;
-               shader->num_uniforms += 12;
-            }
-
             nir_intrinsic_instr *load_base =
-               nir_intrinsic_instr_create(shader, nir_intrinsic_load_uniform);
+               nir_intrinsic_instr_create(shader, nir_intrinsic_load_push_constant);
             load_base->num_components = 3;
             load_base->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
             nir_ssa_dest_init(&load_base->instr, &load_base->dest, 3, 32, NULL);
-            nir_intrinsic_set_base(load_base, base_id_offset);
+            nir_intrinsic_set_base(load_base,
+                                   offsetof(struct anv_push_constants,
+                                            cs.base_work_group_id));
             nir_intrinsic_set_range(load_base, 3 * sizeof(uint32_t));
             nir_builder_instr_insert(&b, &load_base->instr);
 
index da64ea0bdf9b54362348b8f1e1109793fec8e1dc..6ebc02cfdd789686fa345b0d68b96a7e282168b5 100644 (file)
@@ -46,9 +46,8 @@ struct apply_pipeline_layout_state {
    /* Place to flag lowered instructions so we don't lower them twice */
    struct set *lowered_instrs;
 
-   int dynamic_offset_uniform_start;
-
    bool uses_constants;
+   bool has_dynamic_buffers;
    uint8_t constants_offset;
    struct {
       bool desc_buffer_used;
@@ -564,7 +563,7 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
       if (!state->add_bounds_checks)
          desc = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3));
 
-      if (state->dynamic_offset_uniform_start >= 0) {
+      if (state->has_dynamic_buffers) {
          /* This shader has dynamic offsets and we have no way of knowing
           * (save from the dynamic offset base index) if this buffer has a
           * dynamic offset.
@@ -598,8 +597,10 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
          }
 
          nir_intrinsic_instr *dyn_load =
-            nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
-         nir_intrinsic_set_base(dyn_load, state->dynamic_offset_uniform_start);
+            nir_intrinsic_instr_create(b->shader,
+                                       nir_intrinsic_load_push_constant);
+         nir_intrinsic_set_base(dyn_load, offsetof(struct anv_push_constants,
+                                                   dynamic_offsets));
          nir_intrinsic_set_range(dyn_load, MAX_DYNAMIC_BUFFERS * 4);
          dyn_load->src[0] = nir_src_for_ssa(nir_imul_imm(b, dyn_offset_idx, 4));
          dyn_load->num_components = 1;
@@ -1119,7 +1120,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
       .add_bounds_checks = robust_buffer_access,
       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access),
       .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
-      .dynamic_offset_uniform_start = -1,
    };
 
    for (unsigned s = 0; s < layout->num_sets; s++) {
@@ -1209,18 +1209,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
    qsort(infos, used_binding_count, sizeof(struct binding_info),
          compare_binding_infos);
 
-   bool have_dynamic_buffers = false;
-
    for (unsigned i = 0; i < used_binding_count; i++) {
       unsigned set = infos[i].set, b = infos[i].binding;
       struct anv_descriptor_set_binding_layout *binding =
             &layout->set[set].layout->binding[b];
 
-      if (binding->dynamic_offset_index >= 0)
-         have_dynamic_buffers = true;
-
       const uint32_t array_size = binding->array_size;
 
+      if (binding->dynamic_offset_index >= 0)
+         state.has_dynamic_buffers = true;
+
       if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
          if (map->surface_count + array_size > MAX_BINDING_TABLE_SIZE ||
              anv_descriptor_requires_bindless(pdevice, binding, false)) {
@@ -1290,16 +1288,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
       }
    }
 
-   if (have_dynamic_buffers) {
-      state.dynamic_offset_uniform_start = shader->num_uniforms;
-      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
-                                                       MAX_DYNAMIC_BUFFERS);
-      for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++)
-         param[i] = ANV_PARAM_DYN_OFFSET(i);
-      shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 4;
-      assert(shader->num_uniforms == prog_data->nr_params * 4);
-   }
-
    nir_foreach_variable(var, &shader->uniforms) {
       const struct glsl_type *glsl_type = glsl_without_array(var->type);
 
index 72bff5544bc3bc180e2d4206400d992b3b4965d7..e624a900d9c0e038cda6e4b354abf6a0146a5b0f 100644 (file)
 #include "compiler/brw_nir.h"
 
 void
-anv_compute_push_layout(const struct anv_physical_device *pdevice,
-                        struct brw_stage_prog_data *prog_data,
-                        struct anv_pipeline_bind_map *map)
+anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
+                            nir_shader *nir,
+                            struct brw_stage_prog_data *prog_data,
+                            struct anv_pipeline_bind_map *map,
+                            void *mem_ctx)
 {
+   memset(map->push_ranges, 0, sizeof(map->push_ranges));
+
+   unsigned push_start = UINT_MAX, push_end = 0;
+   nir_foreach_function(function, nir) {
+      if (!function->impl)
+         continue;
+
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_push_constant)
+               continue;
+
+            unsigned base = nir_intrinsic_base(intrin);
+            unsigned range = nir_intrinsic_range(intrin);
+            push_start = MIN2(push_start, base);
+            push_end = MAX2(push_end, base + range);
+         }
+      }
+   }
+
+   const bool has_push_intrinsic = push_start <= push_end;
+
+   if (nir->info.stage == MESA_SHADER_COMPUTE) {
+      /* For compute shaders, we always have to have the subgroup ID.  The
+       * back-end compiler will "helpfully" add it for us in the last push
+       * constant slot.  Yes, there is an off-by-one error here but that's
+       * because the back-end will add it so we want to claim the number of
+       * push constants one dword less than the full amount including
+       * gl_SubgroupId.
+       */
+      assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));
+      push_end = offsetof(struct anv_push_constants, cs.subgroup_id);
+   }
+
+   /* Align push_start down to a 32B boundary and make it no larger than
+    * push_end (no push constants is indicated by push_start = UINT_MAX).
+    */
+   push_start = MIN2(push_start, push_end);
+   push_start &= ~31u;
+
+   if (has_push_intrinsic) {
+      nir_foreach_function(function, nir) {
+         if (!function->impl)
+            continue;
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                  continue;
+
+               nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+               if (intrin->intrinsic != nir_intrinsic_load_push_constant)
+                  continue;
+
+               intrin->intrinsic = nir_intrinsic_load_uniform;
+               nir_intrinsic_set_base(intrin,
+                                      nir_intrinsic_base(intrin) -
+                                      push_start);
+            }
+         }
+      }
+   }
+
+   /* For vec4 our push data size needs to be aligned to a vec4 and for
+    * scalar, it needs to be aligned to a DWORD.
+    */
+   const unsigned align =
+      pdevice->compiler->scalar_stage[nir->info.stage] ? 4 : 16;
+   nir->num_uniforms = ALIGN(push_end - push_start, align);
+   prog_data->nr_params = nir->num_uniforms / 4;
+   prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
+
    struct anv_push_range push_constant_range = {
       .set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
-      .length = DIV_ROUND_UP(prog_data->nr_params, 8),
+      .start = push_start / 32,
+      .length = DIV_ROUND_UP(push_end - push_start, 32),
    };
 
-   if (pdevice->info.gen >= 8 || pdevice->info.is_haswell) {
+   if ((pdevice->info.gen >= 8 || pdevice->info.is_haswell) &&
+       nir->info.stage != MESA_SHADER_COMPUTE) {
+      brw_nir_analyze_ubo_ranges(pdevice->compiler, nir, NULL,
+                                 prog_data->ubo_ranges);
+
+      /* We can push at most 64 registers worth of data.  The back-end
+       * compiler would do this fixup for us but we'd like to calculate
+       * the push constant layout ourselves.
+       */
+      unsigned total_push_regs = push_constant_range.length;
+      for (unsigned i = 0; i < 4; i++) {
+         if (total_push_regs + prog_data->ubo_ranges[i].length > 64)
+            prog_data->ubo_ranges[i].length = 64 - total_push_regs;
+         total_push_regs += prog_data->ubo_ranges[i].length;
+      }
+      assert(total_push_regs <= 64);
+
       /* The Skylake PRM contains the following restriction:
        *
        *    "The driver must ensure The following case does not occur
@@ -72,7 +167,31 @@ anv_compute_push_layout(const struct anv_physical_device *pdevice,
        * rule that would require us to iterate in the other direction
        * and possibly mess around with dynamic state base address.
        * Don't bother; just emit regular push constants at n = 0.
+       *
+       * In the compute case, we don't have multiple push ranges so it's
+       * better to just provide one in push_ranges[0].
        */
       map->push_ranges[0] = push_constant_range;
    }
 }
+
+void
+anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
+                             struct anv_pipeline_bind_map *map)
+{
+#ifndef NDEBUG
+   unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);
+   for (unsigned i = 0; i < 4; i++)
+      prog_data_push_size += prog_data->ubo_ranges[i].length;
+
+   unsigned bind_map_push_size = 0;
+   for (unsigned i = 0; i < 4; i++)
+      bind_map_push_size += map->push_ranges[i].length;
+
+   /* We could go through everything again but it should be enough to assert
+    * that they push the same number of registers.  This should alert us if
+    * the back-end compiler decides to re-arrange stuff or shrink a range.
+    */
+   assert(prog_data_push_size == bind_map_push_size);
+#endif
+}
diff --git a/src/intel/vulkan/anv_nir_lower_push_constants.c b/src/intel/vulkan/anv_nir_lower_push_constants.c
deleted file mode 100644 (file)
index ad60d0c..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "anv_nir.h"
-
-void
-anv_nir_lower_push_constants(nir_shader *shader)
-{
-   nir_foreach_function(function, shader) {
-      if (!function->impl)
-         continue;
-
-      nir_foreach_block(block, function->impl) {
-         nir_foreach_instr(instr, block) {
-            if (instr->type != nir_instr_type_intrinsic)
-               continue;
-
-            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-            /* TODO: Handle indirect push constants */
-            if (intrin->intrinsic != nir_intrinsic_load_push_constant)
-               continue;
-
-            /* We just turn them into uniform loads */
-            intrin->intrinsic = nir_intrinsic_load_uniform;
-         }
-      }
-   }
-}
index bcc62b77a3d5cbe8af5c645c4f7f3729437b1c4f..144fcf46df372adcdc554efa4ae2288b7c606faa 100644 (file)
@@ -672,37 +672,11 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
 
    NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
 
-   NIR_PASS_V(nir, anv_nir_lower_push_constants);
-
    if (nir->info.stage != MESA_SHADER_COMPUTE)
       NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
 
    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 
-   if (nir->num_uniforms > 0) {
-      assert(prog_data->nr_params == 0);
-
-      /* If the shader uses any push constants at all, we'll just give
-       * them the maximum possible number
-       */
-      assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
-      nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE;
-      prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
-      prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
-
-      /* We now set the param values to be offsets into a
-       * anv_push_constant_data structure.  Since the compiler doesn't
-       * actually dereference any of the gl_constant_value pointers in the
-       * params array, it doesn't really matter what we put here.
-       */
-      struct anv_push_constants *null_data = NULL;
-      /* Fill out the push constants section of the param array */
-      for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
-         prog_data->param[i] = ANV_PARAM_PUSH(
-            (uintptr_t)&null_data->client_data[i * sizeof(float)]);
-      }
-   }
-
    if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
       pipeline->needs_data_cache = true;
 
@@ -732,10 +706,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
               nir_lower_non_uniform_texture_access |
               nir_lower_non_uniform_image_access);
 
-   if (nir->info.stage != MESA_SHADER_COMPUTE)
-      brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
-
-   assert(nir->num_uniforms == prog_data->nr_params * 4);
+   anv_nir_compute_push_layout(pdevice, nir, prog_data,
+                               &stage->bind_map, mem_ctx);
 
    stage->nir = nir;
 }
@@ -1394,9 +1366,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
          goto fail;
       }
 
-      anv_compute_push_layout(&pipeline->device->instance->physicalDevice,
-                              &stages[s].prog_data.base,
-                              &stages[s].bind_map);
+      anv_nir_validate_push_layout(&stages[s].prog_data.base,
+                                   &stages[s].bind_map);
 
       struct anv_shader_bin *bin =
          anv_device_upload_kernel(pipeline->device, cache,
@@ -1559,10 +1530,9 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
-      anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
+      NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
 
-      NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
-                 &stage.prog_data.cs);
+      anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
 
       NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
                  nir_var_mem_shared, shared_type_info);
@@ -1578,6 +1548,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
       }
 
+      anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
+
       const unsigned code_size = stage.prog_data.base.program_size;
       bin = anv_device_upload_kernel(pipeline->device, cache,
                                      &stage.cache_key, sizeof(stage.cache_key),
index 771268db156ce6be0de7d4b2f1ca70406deb8d70..3ad4a5971e20c95fa6cc107f5fd0d069ed46bfa4 100644 (file)
@@ -2362,14 +2362,6 @@ struct anv_xfb_binding {
    VkDeviceSize                                 size;
 };
 
-#define ANV_PARAM_PUSH(offset)         ((1 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_PUSH(param)       ((uint32_t)(param) >> 16 == 1)
-#define ANV_PARAM_PUSH_OFFSET(param)   ((param) & 0xffff)
-
-#define ANV_PARAM_DYN_OFFSET(offset)      ((2 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_DYN_OFFSET(param)    ((uint32_t)(param) >> 16 == 2)
-#define ANV_PARAM_DYN_OFFSET_IDX(param)   ((param) & 0xffff)
-
 struct anv_push_constants {
    /** Push constant data provided by the client through vkPushConstants */
    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
@@ -2390,6 +2382,9 @@ struct anv_push_constants {
        * uploading the push constants for compute shaders.
        */
       uint32_t subgroup_id;
+
+      /** Pad out to a multiple of 32 bytes */
+      uint32_t pad[4];
    } cs;
 };
 
index 31127aabf11ed2658e7b548ca86eba445365edf5..b8d2f1e9e8707915847d677dd94239d264724549 100644 (file)
@@ -116,7 +116,6 @@ libanv_files = files(
   'anv_nir_apply_pipeline_layout.c',
   'anv_nir_compute_push_layout.c',
   'anv_nir_lower_multiview.c',
-  'anv_nir_lower_push_constants.c',
   'anv_nir_lower_ycbcr_textures.c',
   'anv_pass.c',
   'anv_perf.c',