X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2FgenX_cmd_buffer.c;h=efc05889f79a8d3ee4b0517924c5778e2f3a4ef7;hb=ffc84eac0d5a0c30e445fcdb2f0cfd0c5bf5321d;hp=22d4f79d28dc24ed422c3751648814efe909c19d;hpb=cd40110420b48b3005c9d1d4ea30e2cbcc9a3d40;p=mesa.git

diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 22d4f79d28d..efc05889f79 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -802,6 +802,7 @@ static void
 anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
                                const struct anv_image *image,
                                enum isl_format format,
+                               struct isl_swizzle swizzle,
                                VkImageAspectFlagBits aspect,
                                uint32_t level, uint32_t array_layer,
                                enum isl_aux_op resolve_op,
@@ -826,14 +827,15 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
        image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_D)
       resolve_op = ISL_AUX_OP_FULL_RESOLVE;
 
-   anv_image_ccs_op(cmd_buffer, image, format, aspect, level,
-                    array_layer, 1, resolve_op, NULL, true);
+   anv_image_ccs_op(cmd_buffer, image, format, swizzle, aspect,
+                    level, array_layer, 1, resolve_op, NULL, true);
 }
 
 static void
 anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
                                const struct anv_image *image,
                                enum isl_format format,
+                               struct isl_swizzle swizzle,
                                VkImageAspectFlagBits aspect,
                                uint32_t array_layer,
                                enum isl_aux_op resolve_op,
@@ -847,7 +849,7 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
                                      aspect, 0, array_layer,
                                      resolve_op, fast_clear_supported);
 
-   anv_image_mcs_op(cmd_buffer, image, format, aspect,
+   anv_image_mcs_op(cmd_buffer, image, format, swizzle, aspect,
                     array_layer, 1, resolve_op, NULL, true);
 #else
    unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail");
@@ -1021,7 +1023,7 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
     * with not having this stall in some cases if we were really careful but
     * it's better to play it safe.  Full stall the GPU.
     */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
    struct gen_mi_builder b;
@@ -1233,6 +1235,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
 
             anv_image_ccs_op(cmd_buffer, image,
                              image->planes[plane].surface.isl.format,
+                             ISL_SWIZZLE_IDENTITY,
                              aspect, level, base_layer, level_layer_count,
                              ISL_AUX_OP_AMBIGUATE, NULL, false);
 
@@ -1252,6 +1255,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
          assert(base_level == 0 && level_count == 1);
          anv_image_mcs_op(cmd_buffer, image,
                           image->planes[plane].surface.isl.format,
+                          ISL_SWIZZLE_IDENTITY,
                           aspect, base_layer, layer_count,
                           ISL_AUX_OP_FAST_CLEAR, NULL, false);
       }
@@ -1331,6 +1335,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
          if (image->samples == 1) {
             anv_cmd_predicated_ccs_resolve(cmd_buffer, image,
                                            image->planes[plane].surface.isl.format,
+                                           ISL_SWIZZLE_IDENTITY,
                                            aspect, level, array_layer, resolve_op,
                                            final_fast_clear);
          } else {
@@ -1344,6 +1349,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
 
             anv_cmd_predicated_mcs_resolve(cmd_buffer, image,
                                            image->planes[plane].surface.isl.format,
+                                           ISL_SWIZZLE_IDENTITY,
                                            aspect, array_layer, resolve_op,
                                            final_fast_clear);
          }
@@ -2036,7 +2042,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
     *    add extra flushes in the case it knows that the engine is already
     *    IDLE."
     */
-   if (GEN_GEN == 12 && ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
+   if (GEN_GEN == 12 && (bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT))
       bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT;
 
    /* If we're going to do an invalidate and we have a pending end-of-pipe
@@ -2379,7 +2385,7 @@ static void
 cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
    VkShaderStageFlags stages =
-      cmd_buffer->state.gfx.base.pipeline->active_stages;
+      cmd_buffer->state.gfx.pipeline->active_stages;
 
    /* In order to avoid thrash, we assume that vertex and fragment stages
     * always exist.  In the rare case where one is missing *and* the other
@@ -2467,44 +2473,16 @@ anv_descriptor_set_address(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
-static struct anv_cmd_pipeline_state *
-pipe_state_for_stage(struct anv_cmd_buffer *cmd_buffer,
-                     gl_shader_stage stage)
-{
-   switch (stage) {
-   case MESA_SHADER_COMPUTE:
-      return &cmd_buffer->state.compute.base;
-
-   case MESA_SHADER_VERTEX:
-   case MESA_SHADER_TESS_CTRL:
-   case MESA_SHADER_TESS_EVAL:
-   case MESA_SHADER_GEOMETRY:
-   case MESA_SHADER_FRAGMENT:
-      return &cmd_buffer->state.gfx.base;
-
-   default:
-      unreachable("invalid stage");
-   }
-}
-
 static VkResult
 emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
-                   gl_shader_stage stage,
+                   struct anv_cmd_pipeline_state *pipe_state,
+                   struct anv_shader_bin *shader,
                    struct anv_state *bt_state)
 {
    struct anv_subpass *subpass = cmd_buffer->state.subpass;
    uint32_t state_offset;
 
-   struct anv_cmd_pipeline_state *pipe_state =
-      pipe_state_for_stage(cmd_buffer, stage);
-   struct anv_pipeline *pipeline = pipe_state->pipeline;
-
-   if (!anv_pipeline_has_stage(pipeline, stage)) {
-      *bt_state = (struct anv_state) { 0, };
-      return VK_SUCCESS;
-   }
-
-   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
+   struct anv_pipeline_bind_map *map = &shader->bind_map;
    if (map->surface_count == 0) {
       *bt_state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -2536,7 +2514,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
       case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
          /* Color attachment binding */
-         assert(stage == MESA_SHADER_FRAGMENT);
+         assert(shader->stage == MESA_SHADER_FRAGMENT);
          if (binding->index < subpass->color_count) {
             const unsigned att =
                subpass->color_attachments[binding->index].attachment;
@@ -2564,11 +2542,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             anv_cmd_buffer_alloc_surface_state(cmd_buffer);
 
          struct anv_address constant_data = {
-            .bo = pipeline->device->dynamic_state_pool.block_pool.bo,
-            .offset = pipeline->shaders[stage]->constant_data.offset,
+            .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+            .offset = shader->constant_data.offset,
          };
-         unsigned constant_data_size =
-            pipeline->shaders[stage]->constant_data_size;
+         unsigned constant_data_size = shader->constant_data_size;
 
          const enum isl_format format =
             anv_isl_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
@@ -2583,7 +2560,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
       case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: {
          /* This is always the first binding for compute shaders */
-         assert(stage == MESA_SHADER_COMPUTE && s == 0);
+         assert(shader->stage == MESA_SHADER_COMPUTE && s == 0);
 
          struct anv_state surface_state =
             anv_cmd_buffer_alloc_surface_state(cmd_buffer);
@@ -2639,7 +2616,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             break;
          }
          case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-            assert(stage == MESA_SHADER_FRAGMENT);
+            assert(shader->stage == MESA_SHADER_FRAGMENT);
             if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
                /* For depth and stencil input attachments, we treat it like any
                 * old texture that a user may have bound.
@@ -2691,7 +2668,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
          case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
             /* Compute the offset within the buffer */
             struct anv_push_constants *push =
-               &cmd_buffer->state.push_constants[stage];
+               &cmd_buffer->state.push_constants[shader->stage];
 
             uint32_t dynamic_offset =
                push->dynamic_offsets[binding->dynamic_offset_index];
@@ -2701,6 +2678,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
             /* Clamp the range to the buffer size */
             uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
 
+            /* Align the range for consistency */
+            if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
+               range = align_u32(range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
+
             struct anv_address address =
                anv_address_add(desc->buffer->address, offset);
 
@@ -2742,19 +2723,11 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
 static VkResult
 emit_samplers(struct anv_cmd_buffer *cmd_buffer,
-              gl_shader_stage stage,
+              struct anv_cmd_pipeline_state *pipe_state,
+              struct anv_shader_bin *shader,
               struct anv_state *state)
 {
-   struct anv_cmd_pipeline_state *pipe_state =
-      pipe_state_for_stage(cmd_buffer, stage);
-   struct anv_pipeline *pipeline = pipe_state->pipeline;
-
-   if (!anv_pipeline_has_stage(pipeline, stage)) {
-      *state = (struct anv_state) { 0, };
-      return VK_SUCCESS;
-   }
-
-   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
+   struct anv_pipeline_bind_map *map = &shader->bind_map;
    if (map->sampler_count == 0) {
       *state = (struct anv_state) { 0, };
       return VK_SUCCESS;
@@ -2792,20 +2765,33 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer,
 
 static uint32_t
 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
-                      struct anv_pipeline *pipeline)
+                      struct anv_cmd_pipeline_state *pipe_state,
+                      struct anv_shader_bin **shaders,
+                      uint32_t num_shaders)
 {
-   VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
-                              pipeline->active_stages;
+   const VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty;
+   VkShaderStageFlags flushed = 0;
 
    VkResult result = VK_SUCCESS;
-   anv_foreach_stage(s, dirty) {
-      result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]);
+   for (uint32_t i = 0; i < num_shaders; i++) {
+      if (!shaders[i])
+         continue;
+
+      gl_shader_stage stage = shaders[i]->stage;
+      VkShaderStageFlags vk_stage = mesa_to_vk_shader_stage(stage);
+      if ((vk_stage & dirty) == 0)
+         continue;
+
+      result = emit_samplers(cmd_buffer, pipe_state, shaders[i],
+                             &cmd_buffer->state.samplers[stage]);
       if (result != VK_SUCCESS)
          break;
-      result = emit_binding_table(cmd_buffer, s,
-                                  &cmd_buffer->state.binding_tables[s]);
+      result = emit_binding_table(cmd_buffer, pipe_state, shaders[i],
+                                  &cmd_buffer->state.binding_tables[stage]);
       if (result != VK_SUCCESS)
          break;
+
+      flushed |= vk_stage;
    }
 
    if (result != VK_SUCCESS) {
@@ -2821,25 +2807,34 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
       genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
 
       /* Re-emit all active binding tables */
-      dirty |= pipeline->active_stages;
-      anv_foreach_stage(s, dirty) {
-         result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]);
+      flushed = 0;
+
+      for (uint32_t i = 0; i < num_shaders; i++) {
+         if (!shaders[i])
+            continue;
+
+         gl_shader_stage stage = shaders[i]->stage;
+
+         result = emit_samplers(cmd_buffer, pipe_state, shaders[i],
+                                &cmd_buffer->state.samplers[stage]);
          if (result != VK_SUCCESS) {
             anv_batch_set_error(&cmd_buffer->batch, result);
             return 0;
          }
-         result = emit_binding_table(cmd_buffer, s,
-                                     &cmd_buffer->state.binding_tables[s]);
+         result = emit_binding_table(cmd_buffer, pipe_state, shaders[i],
+                                     &cmd_buffer->state.binding_tables[stage]);
          if (result != VK_SUCCESS) {
             anv_batch_set_error(&cmd_buffer->batch, result);
             return 0;
          }
+
+         flushed |= mesa_to_vk_shader_stage(stage);
       }
    }
 
-   cmd_buffer->state.descriptors_dirty &= ~dirty;
+   cmd_buffer->state.descriptors_dirty &= ~flushed;
 
-   return dirty;
+   return flushed;
 }
 
 static void
@@ -2886,7 +2881,6 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
    }
 }
 
-#if GEN_GEN >= 8 || GEN_IS_HASWELL
 static struct anv_address
 get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
                        gl_shader_stage stage,
@@ -2902,7 +2896,6 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
       struct anv_descriptor_set *set =
          gfx_state->base.descriptors[range->index];
       return anv_descriptor_set_address(cmd_buffer, set);
-      break;
    }
 
    case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: {
@@ -2912,7 +2905,6 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
          .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
          .offset = state.offset,
       };
-      break;
    }
 
    default: {
@@ -2936,14 +2928,73 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
    }
    }
 }
-#endif
+
+
+/** Returns the size in bytes of the bound buffer
+ *
+ * The range is relative to the start of the buffer, not the start of the
+ * range.  The returned range may be smaller than
+ *
+ *    (range->start + range->length) * 32;
+ */
+static uint32_t
+get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
+                          gl_shader_stage stage,
+                          const struct anv_push_range *range)
+{
+   assert(stage != MESA_SHADER_COMPUTE);
+   const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+   switch (range->set) {
+   case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
+      struct anv_descriptor_set *set =
+         gfx_state->base.descriptors[range->index];
+      assert(range->start * 32 < set->desc_mem.alloc_size);
+      assert((range->start + range->length) * 32 <= set->desc_mem.alloc_size);
+      return set->desc_mem.alloc_size;
+   }
+
+   case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
+      return (range->start + range->length) * 32;
+
+   default: {
+      assert(range->set < MAX_SETS);
+      struct anv_descriptor_set *set =
+         gfx_state->base.descriptors[range->set];
+      const struct anv_descriptor *desc =
+         &set->descriptors[range->index];
+
+      if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+         return desc->buffer_view->range;
+      } else {
+         assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+         /* Compute the offset within the buffer */
+         struct anv_push_constants *push =
+            &cmd_buffer->state.push_constants[stage];
+         uint32_t dynamic_offset =
+            push->dynamic_offsets[range->dynamic_offset_index];
+         uint64_t offset = desc->offset + dynamic_offset;
+         /* Clamp to the buffer size */
+         offset = MIN2(offset, desc->buffer->size);
+         /* Clamp the range to the buffer size */
+         uint32_t bound_range = MIN2(desc->range, desc->buffer->size - offset);
+
+         /* Align the range for consistency */
+         bound_range = align_u32(bound_range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
+
+         return bound_range;
+      }
+   }
+   }
+}
 
 static void
 cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
-                              gl_shader_stage stage, unsigned buffer_count)
+                              gl_shader_stage stage,
+                              struct anv_address *buffers,
+                              unsigned buffer_count)
 {
    const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
-   const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+   const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
 
    static const uint32_t push_constant_opcodes[] = {
       [MESA_SHADER_VERTEX]                      = 21,
@@ -2993,24 +3044,23 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
              */
             assert((GEN_GEN >= 8 || GEN_IS_HASWELL) || i == 0);
 
-            const struct anv_address addr =
-               get_push_range_address(cmd_buffer, stage, range);
             c.ConstantBody.ReadLength[i + shift] = range->length;
             c.ConstantBody.Buffer[i + shift] =
-               anv_address_add(addr, range->start * 32);
+               anv_address_add(buffers[i], range->start * 32);
          }
 #else
          /* For Ivy Bridge, push constants are relative to dynamic state
           * base address and we only ever push actual push constants.
           */
          if (bind_map->push_ranges[0].length > 0) {
+            assert(buffer_count == 1);
             assert(bind_map->push_ranges[0].set ==
                    ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
-            struct anv_state state =
-               anv_cmd_buffer_push_constants(cmd_buffer, stage);
+            assert(buffers[0].bo ==
+                   cmd_buffer->device->dynamic_state_pool.block_pool.bo);
             c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
             c.ConstantBody.Buffer[0].bo = NULL;
-            c.ConstantBody.Buffer[0].offset = state.offset;
+            c.ConstantBody.Buffer[0].offset = buffers[0].offset;
          }
          assert(bind_map->push_ranges[1].length == 0);
          assert(bind_map->push_ranges[2].length == 0);
@@ -3023,9 +3073,11 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
 #if GEN_GEN >= 12
 static void
 cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
-                                  uint32_t shader_mask, uint32_t count)
+                                  uint32_t shader_mask,
+                                  struct anv_address *buffers,
+                                  uint32_t buffer_count)
 {
-   if (count == 0) {
+   if (buffer_count == 0) {
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
          c.ShaderUpdateEnable = shader_mask;
          c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
@@ -3034,7 +3086,7 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
    }
 
    const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
-   const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+   const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
 
    static const uint32_t push_constant_opcodes[] = {
       [MESA_SHADER_VERTEX]                      = 21,
@@ -3053,24 +3105,22 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
       &pipeline->shaders[stage]->bind_map;
 
    uint32_t *dw;
-   const uint32_t buffers = (1 << count) - 1;
-   const uint32_t num_dwords = 2 + 2 * count;
+   const uint32_t buffer_mask = (1 << buffer_count) - 1;
+   const uint32_t num_dwords = 2 + 2 * buffer_count;
 
    dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
                         GENX(3DSTATE_CONSTANT_ALL),
                         .ShaderUpdateEnable = shader_mask,
-                        .PointerBufferMask = buffers,
+                        .PointerBufferMask = buffer_mask,
                         .MOCS = cmd_buffer->device->isl_dev.mocs.internal);
 
-   for (int i = 0; i < count; i++) {
+   for (int i = 0; i < buffer_count; i++) {
       const struct anv_push_range *range = &bind_map->push_ranges[i];
-      const struct anv_address addr =
-         get_push_range_address(cmd_buffer, stage, range);
-
       GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
          &cmd_buffer->batch, dw + 2 + i * 2,
          &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
-            .PointerToConstantBuffer = anv_address_add(addr, range->start * 32),
+            .PointerToConstantBuffer =
+               anv_address_add(buffers[i], range->start * 32),
             .ConstantBufferReadLength = range->length,
          });
    }
@@ -3083,7 +3133,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
 {
    VkShaderStageFlags flushed = 0;
    const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
-   const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+   const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
 
 #if GEN_GEN >= 12
    uint32_t nobuffer_stages = 0;
@@ -3092,20 +3142,64 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
    anv_foreach_stage(stage, dirty_stages) {
       unsigned buffer_count = 0;
       flushed |= mesa_to_vk_shader_stage(stage);
-      uint32_t max_push_range = 0;
+      UNUSED uint32_t max_push_range = 0;
 
+      struct anv_address buffers[4] = {};
       if (anv_pipeline_has_stage(pipeline, stage)) {
          const struct anv_pipeline_bind_map *bind_map =
             &pipeline->shaders[stage]->bind_map;
+         struct anv_push_constants *push =
+            &cmd_buffer->state.push_constants[stage];
+
+         if (cmd_buffer->device->robust_buffer_access) {
+            push->push_reg_mask = 0;
+            /* Start of the current range in the shader, relative to the start
+             * of push constants in the shader.
+             */
+            unsigned range_start_reg = 0;
+            for (unsigned i = 0; i < 4; i++) {
+               const struct anv_push_range *range = &bind_map->push_ranges[i];
+               if (range->length == 0)
+                  continue;
+
+               unsigned bound_size =
+                  get_push_range_bound_size(cmd_buffer, stage, range);
+               if (bound_size >= range->start * 32) {
+                  unsigned bound_regs =
+                     MIN2(DIV_ROUND_UP(bound_size, 32) - range->start,
+                          range->length);
+                  assert(range_start_reg + bound_regs <= 64);
+                  push->push_reg_mask |= BITFIELD64_RANGE(range_start_reg,
+                                                          bound_regs);
+               }
+
+               cmd_buffer->state.push_constants_dirty |=
+                  mesa_to_vk_shader_stage(stage);
 
+               range_start_reg += range->length;
+            }
+         }
+
+         /* We have to gather buffer addresses as a second step because the
+          * loop above puts data into the push constant area and the call to
+          * get_push_range_address is what locks our push constants and copies
+          * them into the actual GPU buffer.  If we did the two loops at the
+          * same time, we'd risk only having some of the sizes in the push
+          * constant buffer when we did the copy.
+          */
          for (unsigned i = 0; i < 4; i++) {
             const struct anv_push_range *range = &bind_map->push_ranges[i];
-            if (range->length > 0) {
-               buffer_count++;
-               if (GEN_GEN >= 12 && range->length > max_push_range)
-                  max_push_range = range->length;
-            }
+            if (range->length == 0)
+               break;
+
+            buffers[i] = get_push_range_address(cmd_buffer, stage, range);
+            max_push_range = MAX2(max_push_range, range->length);
+            buffer_count++;
          }
+
+         /* We have at most 4 buffers but they should be tightly packed */
+         for (unsigned i = buffer_count; i < 4; i++)
+            assert(bind_map->push_ranges[i].length == 0);
       }
 
 #if GEN_GEN >= 12
@@ -3123,17 +3217,17 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
        */
       if (max_push_range < 32) {
          cmd_buffer_emit_push_constant_all(cmd_buffer, 1 << stage,
-                                           buffer_count);
+                                           buffers, buffer_count);
          continue;
       }
 #endif
 
-      cmd_buffer_emit_push_constant(cmd_buffer, stage, buffer_count);
+      cmd_buffer_emit_push_constant(cmd_buffer, stage, buffers, buffer_count);
    }
 
 #if GEN_GEN >= 12
    if (nobuffer_stages)
-      cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, 0);
+      cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, NULL, 0);
 #endif
 
    cmd_buffer->state.push_constants_dirty &= ~flushed;
@@ -3142,21 +3236,21 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
 void
 genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
 {
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    uint32_t *p;
 
-   uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used;
-   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE)
-      vb_emit |= pipeline->vb_used;
-
    assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
 
-   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
 
    genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);
 
    genX(flush_pipeline_select_3d)(cmd_buffer);
 
+   uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used;
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE)
+      vb_emit |= pipeline->vb_used;
+
    if (vb_emit) {
       const uint32_t num_buffers = __builtin_popcount(vb_emit);
       const uint32_t num_dwords = 1 + num_buffers * 4;
@@ -3235,7 +3329,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
 #endif
 
    if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
-      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);
 
       /* If the pipeline changed, we may need to re-allocate push constant
        * space in the URB.
@@ -3277,8 +3371,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
     * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect.
     */
    uint32_t dirty = 0;
-   if (cmd_buffer->state.descriptors_dirty)
-      dirty = flush_descriptor_sets(cmd_buffer, pipeline);
+   if (cmd_buffer->state.descriptors_dirty) {
+      dirty = flush_descriptor_sets(cmd_buffer,
+                                    &cmd_buffer->state.gfx.base,
+                                    pipeline->shaders,
+                                    ARRAY_SIZE(pipeline->shaders));
+   }
 
    if (dirty || cmd_buffer->state.push_constants_dirty) {
       /* Because we're pushing UBOs, we have to push whenever either
@@ -3385,7 +3483,7 @@ static void
 update_dirty_vbs_for_gen8_vb_flush(struct anv_cmd_buffer *cmd_buffer,
                                    uint32_t access_type)
 {
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    uint64_t vb_used = pipeline->vb_used;
@@ -3408,7 +3506,7 @@ void genX(CmdDraw)(
     uint32_t                                    firstInstance)
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3433,7 +3531,8 @@ void genX(CmdDraw)(
    /* Our implementation of VK_KHR_multiview uses instancing to draw the
     * different views.  We need to multiply instanceCount by the view count.
     */
-   instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
+   if (!pipeline->use_primitive_replication)
+      instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
       prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
@@ -3458,7 +3557,7 @@ void genX(CmdDrawIndexed)(
     uint32_t                                    firstInstance)
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3483,7 +3582,8 @@ void genX(CmdDrawIndexed)(
    /* Our implementation of VK_KHR_multiview uses instancing to draw the
     * different views.  We need to multiply instanceCount by the view count.
     */
-   instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
+   if (!pipeline->use_primitive_replication)
+      instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
 
    anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
       prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
@@ -3519,7 +3619,7 @@ void genX(CmdDrawIndirectByteCountEXT)(
 #if GEN_IS_HASWELL || GEN_GEN >= 8
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    /* firstVertex is always zero for this draw function */
@@ -3544,7 +3644,8 @@ void genX(CmdDrawIndirectByteCountEXT)(
    /* Our implementation of VK_KHR_multiview uses instancing to draw the
     * different views.  We need to multiply instanceCount by the view count.
     */
-   instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
+   if (!pipeline->use_primitive_replication)
+      instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
 
    struct gen_mi_builder b;
    gen_mi_builder_init(&b, &cmd_buffer->batch);
@@ -3621,7 +3722,7 @@ void genX(CmdDrawIndirect)(
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3670,7 +3771,7 @@ void genX(CmdDrawIndexedIndirect)(
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3814,7 +3915,7 @@ void genX(CmdDrawIndirectCount)(
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
    struct anv_cmd_state *cmd_state = &cmd_buffer->state;
-   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_state->gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3880,7 +3981,7 @@ void genX(CmdDrawIndexedIndirectCount)(
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
    struct anv_cmd_state *cmd_state = &cmd_buffer->state;
-   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_state->gfx.pipeline;
    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
 
    if (anv_batch_has_error(&cmd_buffer->batch))
@@ -4038,11 +4139,11 @@ void genX(CmdEndTransformFeedbackEXT)(
 void
 genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 {
-   struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
+   struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
 
-   assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
+   assert(pipeline->cs);
 
-   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
 
    genX(flush_pipeline_select_gpgpu)(cmd_buffer);
 
@@ -4058,7 +4159,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
       cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
 
-      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);
 
       /* The workgroup size of the pipeline affects our push constant layout
        * so flag push constants as dirty if we change the pipeline.
@@ -4068,7 +4169,9 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 
    if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
        cmd_buffer->state.compute.pipeline_dirty) {
-      flush_descriptor_sets(cmd_buffer, pipeline);
+      flush_descriptor_sets(cmd_buffer,
+                            &cmd_buffer->state.compute.base,
+                            &pipeline->cs, 1);
 
       uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
       struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
@@ -4172,7 +4275,7 @@ void genX(CmdDispatchBase)(
     uint32_t                                    groupCountZ)
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
+   struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
    const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
 
    anv_cmd_buffer_push_base_group_id(cmd_buffer, baseGroupX,
@@ -4207,7 +4310,7 @@ void genX(CmdDispatchBase)(
       ggw.SIMDSize                     = prog_data->simd_size / 16;
       ggw.ThreadDepthCounterMaximum    = 0;
       ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = prog_data->threads - 1;
+      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
       ggw.ThreadGroupIDXDimension      = groupCountX;
       ggw.ThreadGroupIDYDimension      = groupCountY;
       ggw.ThreadGroupIDZDimension      = groupCountZ;
@@ -4229,7 +4332,7 @@ void genX(CmdDispatchIndirect)(
 {
    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
+   struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
    const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
    struct anv_address addr = anv_address_add(buffer->address, offset);
    struct anv_batch *batch = &cmd_buffer->batch;
@@ -4323,7 +4426,7 @@ void genX(CmdDispatchIndirect)(
       ggw.SIMDSize                     = prog_data->simd_size / 16;
       ggw.ThreadDepthCounterMaximum    = 0;
       ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = prog_data->threads - 1;
+      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
       ggw.RightExecutionMask           = pipeline->cs_right_mask;
       ggw.BottomExecutionMask          = 0xffffffff;
    }
@@ -4959,6 +5062,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
             if (iview->image->samples == 1) {
                anv_image_ccs_op(cmd_buffer, image,
                                 iview->planes[0].isl.format,
+                                iview->planes[0].isl.swizzle,
                                 VK_IMAGE_ASPECT_COLOR_BIT,
                                 0, 0, 1, ISL_AUX_OP_FAST_CLEAR,
                                 &clear_color,
@@ -4966,6 +5070,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
             } else {
                anv_image_mcs_op(cmd_buffer, image,
                                 iview->planes[0].isl.format,
+                                iview->planes[0].isl.swizzle,
                                 VK_IMAGE_ASPECT_COLOR_BIT,
                                 0, 1, ISL_AUX_OP_FAST_CLEAR,
                                 &clear_color,
@@ -5487,6 +5592,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
                if (image->samples == 1) {
                   anv_cmd_predicated_ccs_resolve(cmd_buffer, image,
                                                  iview->planes[0].isl.format,
+                                                 iview->planes[0].isl.swizzle,
                                                  VK_IMAGE_ASPECT_COLOR_BIT,
                                                  iview->planes[0].isl.base_level,
                                                  array_layer,
@@ -5495,6 +5601,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
                } else {
                   anv_cmd_predicated_mcs_resolve(cmd_buffer, image,
                                                  iview->planes[0].isl.format,
+                                                 iview->planes[0].isl.swizzle,
                                                  VK_IMAGE_ASPECT_COLOR_BIT,
                                                  base_layer,
                                                  ISL_AUX_OP_PARTIAL_RESOLVE,