anv: simplify push constant emissions
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Tue, 4 Aug 2020 14:25:37 +0000 (17:25 +0300)
committerLionel Landwerlin <lionel.g.landwerlin@intel.com>
Fri, 28 Aug 2020 06:58:46 +0000 (09:58 +0300)
Instead of allocating a push constant buffer per stage from the
dynamic state pool, we can use the same one for all stages.

We can do this because the push constant data is supposed to be
identical of all stages. Even if vkCmdPushConstants() allows to update
chunks of the push constant data differently per stage, this valid
usage guarantees that any chunk of push constant data used be 2
different stages must be identical :

   "For each byte in the range specified by offset and size and for
    each push constant range that overlaps that byte, stageFlags must
    include all stages in that push constant range’s
    VkPushConstantRange::stageFlags"

v2: Fix dirtying of stages (Jason)

v3: Move push constant data into base pipeline state struct (Jason)

v4: Remove duplicated field (Jason)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6183>

src/intel/vulkan/anv_cmd_buffer.c
src/intel/vulkan/anv_descriptor_set.c
src/intel/vulkan/anv_private.h
src/intel/vulkan/genX_cmd_buffer.c

index 5d55e0c41776a3de750f7c2db66d0fbbd10d34c4..3b76d8f1b1d8661f2a51eb8289c9ee8366fbaac2 100644 (file)
@@ -848,28 +848,25 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
 
    if (dynamic_offsets) {
       if (set_layout->dynamic_offset_count > 0) {
+         struct anv_push_constants *push = &pipe_state->push_constants;
          uint32_t dynamic_offset_start =
             layout->set[set_index].dynamic_offset_start;
-
-         anv_foreach_stage(stage, stages) {
-            struct anv_push_constants *push =
-               &cmd_buffer->state.push_constants[stage];
-            uint32_t *push_offsets =
-               &push->dynamic_offsets[dynamic_offset_start];
-
-            /* Assert that everything is in range */
-            assert(set_layout->dynamic_offset_count <= *dynamic_offset_count);
-            assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
-                   ARRAY_SIZE(push->dynamic_offsets));
-
-            unsigned mask = set_layout->stage_dynamic_offsets[stage];
-            STATIC_ASSERT(MAX_DYNAMIC_BUFFERS <= sizeof(mask) * 8);
-            while (mask) {
-               int i = u_bit_scan(&mask);
-               if (push_offsets[i] != (*dynamic_offsets)[i]) {
-                  push_offsets[i] = (*dynamic_offsets)[i];
-                  dirty_stages |= mesa_to_vk_shader_stage(stage);
-               }
+         uint32_t *push_offsets =
+            &push->dynamic_offsets[dynamic_offset_start];
+
+         /* Assert that everything is in range */
+         assert(set_layout->dynamic_offset_count <= *dynamic_offset_count);
+         assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
+                ARRAY_SIZE(push->dynamic_offsets));
+
+         for (uint32_t i = 0; i < set_layout->dynamic_offset_count; i++) {
+            if (push_offsets[i] != (*dynamic_offsets)[i]) {
+               push_offsets[i] = (*dynamic_offsets)[i];
+               /* dynamic_offset_stages[] elements could contain blanket
+                * values like VK_SHADER_STAGE_ALL, so limit this to the
+                * binding point's bits.
+                */
+               dirty_stages |= set_layout->dynamic_offset_stages[i] & stages;
             }
          }
 
@@ -1028,11 +1025,10 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
 }
 
 struct anv_state
-anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
-                              gl_shader_stage stage)
+anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_push_constants *data =
-      &cmd_buffer->state.push_constants[stage];
+      &cmd_buffer->state.gfx.base.push_constants;
 
    struct anv_state state =
       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
@@ -1047,7 +1043,7 @@ struct anv_state
 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
    struct anv_push_constants *data =
-      &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
+      &cmd_buffer->state.compute.base.push_constants;
    struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
@@ -1102,9 +1098,17 @@ void anv_CmdPushConstants(
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   anv_foreach_stage(stage, stageFlags) {
-      memcpy(cmd_buffer->state.push_constants[stage].client_data + offset,
-             pValues, size);
+   if (stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
+      struct anv_cmd_pipeline_state *pipe_state =
+         &cmd_buffer->state.gfx.base;
+
+      memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
+   }
+   if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
+      struct anv_cmd_pipeline_state *pipe_state =
+         &cmd_buffer->state.compute.base;
+
+      memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
    }
 
    cmd_buffer->state.push_constants_dirty |= stageFlags;
index 961d5c2ad5200bee179f55e494ce8371ce45d97f..7ea6c0c452f98b847d01bcf2ef84478ed0de3b5a 100644 (file)
@@ -470,13 +470,7 @@ VkResult anv_CreateDescriptorSetLayout(
       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
          set_layout->binding[b].dynamic_offset_index = dynamic_offset_count;
-         anv_foreach_stage(s, binding->stageFlags) {
-            STATIC_ASSERT(MAX_DYNAMIC_BUFFERS <=
-                          sizeof(set_layout->stage_dynamic_offsets[s]) * 8);
-            set_layout->stage_dynamic_offsets[s] |=
-               BITFIELD_RANGE(set_layout->binding[b].dynamic_offset_index,
-                              binding->descriptorCount);
-         }
+         set_layout->dynamic_offset_stages[dynamic_offset_count] = binding->stageFlags;
          dynamic_offset_count += binding->descriptorCount;
          assert(dynamic_offset_count < MAX_DYNAMIC_BUFFERS);
          break;
index a600db566cb3192082408be2900bd48571206e22..e34bfb3d8ee5c82665056f9ec32835e87d29fcbd 100644 (file)
@@ -1974,8 +1974,10 @@ struct anv_descriptor_set_layout {
    /* Number of dynamic offsets used by this descriptor set */
    uint16_t dynamic_offset_count;
 
-   /* For each shader stage, which offsets apply to that stage */
-   uint16_t stage_dynamic_offsets[MESA_SHADER_STAGES];
+   /* For each dynamic buffer, which VkShaderStageFlagBits stages are using
+    * this buffer
+    */
+   VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];
 
    /* Size of the descriptor buffer for this descriptor set */
    uint32_t descriptor_buffer_size;
@@ -2807,6 +2809,11 @@ struct anv_vb_cache_range {
 struct anv_cmd_pipeline_state {
    struct anv_descriptor_set *descriptors[MAX_SETS];
    struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
+
+   struct anv_push_constants push_constants;
+
+   /* Push constant state allocated when flushing push constants. */
+   struct anv_state          push_constants_state;
 };
 
 /** State tracking for graphics pipeline
@@ -2881,7 +2888,6 @@ struct anv_cmd_state {
    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
    bool                                         xfb_enabled;
    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
-   struct anv_push_constants                    push_constants[MESA_SHADER_STAGES];
    struct anv_state                             binding_tables[MESA_SHADER_STAGES];
    struct anv_state                             samplers[MESA_SHADER_STAGES];
 
@@ -3056,8 +3062,7 @@ void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
 
 struct anv_state
-anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
-                              gl_shader_stage stage);
+anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);
 struct anv_state
 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
 
index c5faba1a2a86e6ffcc152394e7680fc79a7d762c..0b9569b54b560f1bf6e18bd2b13c84d60627acd9 100644 (file)
@@ -2469,6 +2469,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
     */
    const bool need_client_mem_relocs =
       !cmd_buffer->device->physical->use_softpin;
+   struct anv_push_constants *push = &pipe_state->push_constants;
 
    for (uint32_t s = 0; s < map->surface_count; s++) {
       struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s];
@@ -2653,9 +2654,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
             if (desc->buffer) {
                /* Compute the offset within the buffer */
-               struct anv_push_constants *push =
-                  &cmd_buffer->state.push_constants[shader->stage];
-
                uint32_t dynamic_offset =
                   push->dynamic_offsets[binding->dynamic_offset_index];
                uint64_t offset = desc->offset + dynamic_offset;
@@ -2880,7 +2878,7 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
                        gl_shader_stage stage,
                        const struct anv_push_range *range)
 {
-   const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+   struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
    switch (range->set) {
    case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
       /* This is a descriptor set buffer so the set index is
@@ -2893,11 +2891,13 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
    }
 
    case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: {
-      struct anv_state state =
-         anv_cmd_buffer_push_constants(cmd_buffer, stage);
+      if (gfx_state->base.push_constants_state.alloc_size == 0) {
+         gfx_state->base.push_constants_state =
+            anv_cmd_buffer_gfx_push_constants(cmd_buffer);
+      }
       return (struct anv_address) {
          .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
-         .offset = state.offset,
+         .offset = gfx_state->base.push_constants_state.offset,
       };
    }
 
@@ -2914,8 +2914,8 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
       } else {
          assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
          if (desc->buffer) {
-            struct anv_push_constants *push =
-               &cmd_buffer->state.push_constants[stage];
+            const struct anv_push_constants *push =
+               &gfx_state->base.push_constants;
             uint32_t dynamic_offset =
                push->dynamic_offsets[range->dynamic_offset_index];
             return anv_address_add(desc->buffer->address,
@@ -2984,8 +2984,8 @@ get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
 
          assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
          /* Compute the offset within the buffer */
-         struct anv_push_constants *push =
-            &cmd_buffer->state.push_constants[stage];
+         const struct anv_push_constants *push =
+            &gfx_state->base.push_constants;
          uint32_t dynamic_offset =
             push->dynamic_offsets[range->dynamic_offset_index];
          uint64_t offset = desc->offset + dynamic_offset;
@@ -3162,13 +3162,18 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
                                 VkShaderStageFlags dirty_stages)
 {
    VkShaderStageFlags flushed = 0;
-   const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+   struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
    const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
 
 #if GEN_GEN >= 12
    uint32_t nobuffer_stages = 0;
 #endif
 
+   /* Resets the push constant state so that we allocate a new one if
+    * needed.
+    */
+   gfx_state->base.push_constants_state = ANV_STATE_NULL;
+
    anv_foreach_stage(stage, dirty_stages) {
       unsigned buffer_count = 0;
       flushed |= mesa_to_vk_shader_stage(stage);
@@ -3178,8 +3183,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
       if (anv_pipeline_has_stage(pipeline, stage)) {
          const struct anv_pipeline_bind_map *bind_map =
             &pipeline->shaders[stage]->bind_map;
-         struct anv_push_constants *push =
-            &cmd_buffer->state.push_constants[stage];
+         struct anv_push_constants *push = &gfx_state->base.push_constants;
 
          if (cmd_buffer->device->robust_buffer_access) {
             push->push_reg_mask = 0;
@@ -4355,7 +4359,7 @@ anv_cmd_buffer_push_base_group_id(struct anv_cmd_buffer *cmd_buffer,
       return;
 
    struct anv_push_constants *push =
-      &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
+      &cmd_buffer->state.compute.base.push_constants;
    if (push->cs.base_work_group_id[0] != baseGroupX ||
        push->cs.base_work_group_id[1] != baseGroupY ||
        push->cs.base_work_group_id[2] != baseGroupZ) {