anv: Move vb_emit setup closer to where it's used in flush_state

[mesa.git] / src / intel / vulkan / genX_cmd_buffer.c
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c

index 22d4f79d28dc24ed422c3751648814efe909c19d..efc05889f79a8d3ee4b0517924c5778e2f3a4ef7 100644 (file)
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -802,6 +802,7 @@ static void
  anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
                                 const struct anv_image *image,
                                 enum isl_format format,
+                               struct isl_swizzle swizzle,
                                 VkImageAspectFlagBits aspect,
                                 uint32_t level, uint32_t array_layer,
                                 enum isl_aux_op resolve_op,
@@ -826,14 +827,15 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer,
         image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_D)
        resolve_op = ISL_AUX_OP_FULL_RESOLVE;
  
-   anv_image_ccs_op(cmd_buffer, image, format, aspect, level,
-                    array_layer, 1, resolve_op, NULL, true);
+   anv_image_ccs_op(cmd_buffer, image, format, swizzle, aspect,
+                    level, array_layer, 1, resolve_op, NULL, true);
  }
  
  static void
  anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
                                 const struct anv_image *image,
                                 enum isl_format format,
+                               struct isl_swizzle swizzle,
                                 VkImageAspectFlagBits aspect,
                                 uint32_t array_layer,
                                 enum isl_aux_op resolve_op,
@@ -847,7 +849,7 @@ anv_cmd_predicated_mcs_resolve(struct anv_cmd_buffer *cmd_buffer,
                                       aspect, 0, array_layer,
                                       resolve_op, fast_clear_supported);
  
-   anv_image_mcs_op(cmd_buffer, image, format, aspect,
+   anv_image_mcs_op(cmd_buffer, image, format, swizzle, aspect,
                      array_layer, 1, resolve_op, NULL, true);
  #else
     unreachable("MCS resolves are unsupported on Ivybridge and Bay Trail");
@@ -1021,7 +1023,7 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
      * with not having this stall in some cases if we were really careful but
      * it's better to play it safe.  Full stall the GPU.
      */
-   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
     genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
  
     struct gen_mi_builder b;
@@ -1233,6 +1235,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
  
              anv_image_ccs_op(cmd_buffer, image,
                               image->planes[plane].surface.isl.format,
+                             ISL_SWIZZLE_IDENTITY,
                               aspect, level, base_layer, level_layer_count,
                               ISL_AUX_OP_AMBIGUATE, NULL, false);
  
@@ -1252,6 +1255,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
           assert(base_level == 0 && level_count == 1);
           anv_image_mcs_op(cmd_buffer, image,
                            image->planes[plane].surface.isl.format,
+                          ISL_SWIZZLE_IDENTITY,
                            aspect, base_layer, layer_count,
                            ISL_AUX_OP_FAST_CLEAR, NULL, false);
        }
@@ -1331,6 +1335,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
           if (image->samples == 1) {
              anv_cmd_predicated_ccs_resolve(cmd_buffer, image,
                                             image->planes[plane].surface.isl.format,
+                                           ISL_SWIZZLE_IDENTITY,
                                             aspect, level, array_layer, resolve_op,
                                             final_fast_clear);
           } else {
@@ -1344,6 +1349,7 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
  
              anv_cmd_predicated_mcs_resolve(cmd_buffer, image,
                                             image->planes[plane].surface.isl.format,
+                                           ISL_SWIZZLE_IDENTITY,
                                             aspect, array_layer, resolve_op,
                                             final_fast_clear);
           }
@@ -2036,7 +2042,7 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
      *    add extra flushes in the case it knows that the engine is already
      *    IDLE."
      */
-   if (GEN_GEN == 12 && ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
+   if (GEN_GEN == 12 && (bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT))
        bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT;
  
     /* If we're going to do an invalidate and we have a pending end-of-pipe
@@ -2379,7 +2385,7 @@ static void
  cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer)
  {
     VkShaderStageFlags stages =
-      cmd_buffer->state.gfx.base.pipeline->active_stages;
+      cmd_buffer->state.gfx.pipeline->active_stages;
  
     /* In order to avoid thrash, we assume that vertex and fragment stages
      * always exist.  In the rare case where one is missing *and* the other
@@ -2467,44 +2473,16 @@ anv_descriptor_set_address(struct anv_cmd_buffer *cmd_buffer,
     }
  }
  
-static struct anv_cmd_pipeline_state *
-pipe_state_for_stage(struct anv_cmd_buffer *cmd_buffer,
-                     gl_shader_stage stage)
-{
-   switch (stage) {
-   case MESA_SHADER_COMPUTE:
-      return &cmd_buffer->state.compute.base;
-
-   case MESA_SHADER_VERTEX:
-   case MESA_SHADER_TESS_CTRL:
-   case MESA_SHADER_TESS_EVAL:
-   case MESA_SHADER_GEOMETRY:
-   case MESA_SHADER_FRAGMENT:
-      return &cmd_buffer->state.gfx.base;
-
-   default:
-      unreachable("invalid stage");
-   }
-}
-
  static VkResult
  emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
-                   gl_shader_stage stage,
+                   struct anv_cmd_pipeline_state *pipe_state,
+                   struct anv_shader_bin *shader,
                     struct anv_state *bt_state)
  {
     struct anv_subpass *subpass = cmd_buffer->state.subpass;
     uint32_t state_offset;
  
-   struct anv_cmd_pipeline_state *pipe_state =
-      pipe_state_for_stage(cmd_buffer, stage);
-   struct anv_pipeline *pipeline = pipe_state->pipeline;
-
-   if (!anv_pipeline_has_stage(pipeline, stage)) {
-      *bt_state = (struct anv_state) { 0, };
-      return VK_SUCCESS;
-   }
-
-   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
+   struct anv_pipeline_bind_map *map = &shader->bind_map;
     if (map->surface_count == 0) {
        *bt_state = (struct anv_state) { 0, };
        return VK_SUCCESS;
@@ -2536,7 +2514,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
  
        case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
           /* Color attachment binding */
-         assert(stage == MESA_SHADER_FRAGMENT);
+         assert(shader->stage == MESA_SHADER_FRAGMENT);
           if (binding->index < subpass->color_count) {
              const unsigned att =
                 subpass->color_attachments[binding->index].attachment;
@@ -2564,11 +2542,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
              anv_cmd_buffer_alloc_surface_state(cmd_buffer);
  
           struct anv_address constant_data = {
-            .bo = pipeline->device->dynamic_state_pool.block_pool.bo,
-            .offset = pipeline->shaders[stage]->constant_data.offset,
+            .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
+            .offset = shader->constant_data.offset,
           };
-         unsigned constant_data_size =
-            pipeline->shaders[stage]->constant_data_size;
+         unsigned constant_data_size = shader->constant_data_size;
  
           const enum isl_format format =
              anv_isl_format_for_descriptor_type(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
@@ -2583,7 +2560,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
  
        case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS: {
           /* This is always the first binding for compute shaders */
-         assert(stage == MESA_SHADER_COMPUTE && s == 0);
+         assert(shader->stage == MESA_SHADER_COMPUTE && s == 0);
  
           struct anv_state surface_state =
              anv_cmd_buffer_alloc_surface_state(cmd_buffer);
@@ -2639,7 +2616,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
              break;
           }
           case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-            assert(stage == MESA_SHADER_FRAGMENT);
+            assert(shader->stage == MESA_SHADER_FRAGMENT);
              if ((desc->image_view->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) == 0) {
                 /* For depth and stencil input attachments, we treat it like any
                  * old texture that a user may have bound.
@@ -2691,7 +2668,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
           case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
              /* Compute the offset within the buffer */
              struct anv_push_constants *push =
-               &cmd_buffer->state.push_constants[stage];
+               &cmd_buffer->state.push_constants[shader->stage];
  
              uint32_t dynamic_offset =
                 push->dynamic_offsets[binding->dynamic_offset_index];
@@ -2701,6 +2678,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
              /* Clamp the range to the buffer size */
              uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
  
+            /* Align the range for consistency */
+            if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
+               range = align_u32(range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
+
              struct anv_address address =
                 anv_address_add(desc->buffer->address, offset);
  
@@ -2742,19 +2723,11 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
  
  static VkResult
  emit_samplers(struct anv_cmd_buffer *cmd_buffer,
-              gl_shader_stage stage,
+              struct anv_cmd_pipeline_state *pipe_state,
+              struct anv_shader_bin *shader,
                struct anv_state *state)
  {
-   struct anv_cmd_pipeline_state *pipe_state =
-      pipe_state_for_stage(cmd_buffer, stage);
-   struct anv_pipeline *pipeline = pipe_state->pipeline;
-
-   if (!anv_pipeline_has_stage(pipeline, stage)) {
-      *state = (struct anv_state) { 0, };
-      return VK_SUCCESS;
-   }
-
-   struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
+   struct anv_pipeline_bind_map *map = &shader->bind_map;
     if (map->sampler_count == 0) {
        *state = (struct anv_state) { 0, };
        return VK_SUCCESS;
@@ -2792,20 +2765,33 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer,
  
  static uint32_t
  flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
-                      struct anv_pipeline *pipeline)
+                      struct anv_cmd_pipeline_state *pipe_state,
+                      struct anv_shader_bin **shaders,
+                      uint32_t num_shaders)
  {
-   VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
-                              pipeline->active_stages;
+   const VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty;
+   VkShaderStageFlags flushed = 0;
  
     VkResult result = VK_SUCCESS;
-   anv_foreach_stage(s, dirty) {
-      result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]);
+   for (uint32_t i = 0; i < num_shaders; i++) {
+      if (!shaders[i])
+         continue;
+
+      gl_shader_stage stage = shaders[i]->stage;
+      VkShaderStageFlags vk_stage = mesa_to_vk_shader_stage(stage);
+      if ((vk_stage & dirty) == 0)
+         continue;
+
+      result = emit_samplers(cmd_buffer, pipe_state, shaders[i],
+                             &cmd_buffer->state.samplers[stage]);
        if (result != VK_SUCCESS)
           break;
-      result = emit_binding_table(cmd_buffer, s,
-                                  &cmd_buffer->state.binding_tables[s]);
+      result = emit_binding_table(cmd_buffer, pipe_state, shaders[i],
+                                  &cmd_buffer->state.binding_tables[stage]);
        if (result != VK_SUCCESS)
           break;
+
+      flushed |= vk_stage;
     }
  
     if (result != VK_SUCCESS) {
@@ -2821,25 +2807,34 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
        genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
  
        /* Re-emit all active binding tables */
-      dirty |= pipeline->active_stages;
-      anv_foreach_stage(s, dirty) {
-         result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]);
+      flushed = 0;
+
+      for (uint32_t i = 0; i < num_shaders; i++) {
+         if (!shaders[i])
+            continue;
+
+         gl_shader_stage stage = shaders[i]->stage;
+
+         result = emit_samplers(cmd_buffer, pipe_state, shaders[i],
+                                &cmd_buffer->state.samplers[stage]);
           if (result != VK_SUCCESS) {
              anv_batch_set_error(&cmd_buffer->batch, result);
              return 0;
           }
-         result = emit_binding_table(cmd_buffer, s,
-                                     &cmd_buffer->state.binding_tables[s]);
+         result = emit_binding_table(cmd_buffer, pipe_state, shaders[i],
+                                     &cmd_buffer->state.binding_tables[stage]);
           if (result != VK_SUCCESS) {
              anv_batch_set_error(&cmd_buffer->batch, result);
              return 0;
           }
+
+         flushed |= mesa_to_vk_shader_stage(stage);
        }
     }
  
-   cmd_buffer->state.descriptors_dirty &= ~dirty;
+   cmd_buffer->state.descriptors_dirty &= ~flushed;
  
-   return dirty;
+   return flushed;
  }
  
  static void
@@ -2886,7 +2881,6 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
     }
  }
  
-#if GEN_GEN >= 8 || GEN_IS_HASWELL
  static struct anv_address
  get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
                         gl_shader_stage stage,
@@ -2902,7 +2896,6 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
        struct anv_descriptor_set *set =
           gfx_state->base.descriptors[range->index];
        return anv_descriptor_set_address(cmd_buffer, set);
-      break;
     }
  
     case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: {
@@ -2912,7 +2905,6 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
           .bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
           .offset = state.offset,
        };
-      break;
     }
  
     default: {
@@ -2936,14 +2928,73 @@ get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
     }
     }
  }
-#endif
+
+
+/** Returns the size in bytes of the bound buffer
+ *
+ * The range is relative to the start of the buffer, not the start of the
+ * range.  The returned range may be smaller than
+ *
+ *    (range->start + range->length) * 32;
+ */
+static uint32_t
+get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
+                          gl_shader_stage stage,
+                          const struct anv_push_range *range)
+{
+   assert(stage != MESA_SHADER_COMPUTE);
+   const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+   switch (range->set) {
+   case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
+      struct anv_descriptor_set *set =
+         gfx_state->base.descriptors[range->index];
+      assert(range->start * 32 < set->desc_mem.alloc_size);
+      assert((range->start + range->length) * 32 <= set->desc_mem.alloc_size);
+      return set->desc_mem.alloc_size;
+   }
+
+   case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
+      return (range->start + range->length) * 32;
+
+   default: {
+      assert(range->set < MAX_SETS);
+      struct anv_descriptor_set *set =
+         gfx_state->base.descriptors[range->set];
+      const struct anv_descriptor *desc =
+         &set->descriptors[range->index];
+
+      if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+         return desc->buffer_view->range;
+      } else {
+         assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+         /* Compute the offset within the buffer */
+         struct anv_push_constants *push =
+            &cmd_buffer->state.push_constants[stage];
+         uint32_t dynamic_offset =
+            push->dynamic_offsets[range->dynamic_offset_index];
+         uint64_t offset = desc->offset + dynamic_offset;
+         /* Clamp to the buffer size */
+         offset = MIN2(offset, desc->buffer->size);
+         /* Clamp the range to the buffer size */
+         uint32_t bound_range = MIN2(desc->range, desc->buffer->size - offset);
+
+         /* Align the range for consistency */
+         bound_range = align_u32(bound_range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
+
+         return bound_range;
+      }
+   }
+   }
+}
  
  static void
  cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
-                              gl_shader_stage stage, unsigned buffer_count)
+                              gl_shader_stage stage,
+                              struct anv_address *buffers,
+                              unsigned buffer_count)
  {
     const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
-   const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+   const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
  
     static const uint32_t push_constant_opcodes[] = {
        [MESA_SHADER_VERTEX]                      = 21,
@@ -2993,24 +3044,23 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
               */
              assert((GEN_GEN >= 8 || GEN_IS_HASWELL) || i == 0);
  
-            const struct anv_address addr =
-               get_push_range_address(cmd_buffer, stage, range);
              c.ConstantBody.ReadLength[i + shift] = range->length;
              c.ConstantBody.Buffer[i + shift] =
-               anv_address_add(addr, range->start * 32);
+               anv_address_add(buffers[i], range->start * 32);
           }
  #else
           /* For Ivy Bridge, push constants are relative to dynamic state
            * base address and we only ever push actual push constants.
            */
           if (bind_map->push_ranges[0].length > 0) {
+            assert(buffer_count == 1);
              assert(bind_map->push_ranges[0].set ==
                     ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
-            struct anv_state state =
-               anv_cmd_buffer_push_constants(cmd_buffer, stage);
+            assert(buffers[0].bo ==
+                   cmd_buffer->device->dynamic_state_pool.block_pool.bo);
              c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
              c.ConstantBody.Buffer[0].bo = NULL;
-            c.ConstantBody.Buffer[0].offset = state.offset;
+            c.ConstantBody.Buffer[0].offset = buffers[0].offset;
           }
           assert(bind_map->push_ranges[1].length == 0);
           assert(bind_map->push_ranges[2].length == 0);
@@ -3023,9 +3073,11 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
  #if GEN_GEN >= 12
  static void
  cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
-                                  uint32_t shader_mask, uint32_t count)
+                                  uint32_t shader_mask,
+                                  struct anv_address *buffers,
+                                  uint32_t buffer_count)
  {
-   if (count == 0) {
+   if (buffer_count == 0) {
        anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
           c.ShaderUpdateEnable = shader_mask;
           c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
@@ -3034,7 +3086,7 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
     }
  
     const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
-   const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+   const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
  
     static const uint32_t push_constant_opcodes[] = {
        [MESA_SHADER_VERTEX]                      = 21,
@@ -3053,24 +3105,22 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
        &pipeline->shaders[stage]->bind_map;
  
     uint32_t *dw;
-   const uint32_t buffers = (1 << count) - 1;
-   const uint32_t num_dwords = 2 + 2 * count;
+   const uint32_t buffer_mask = (1 << buffer_count) - 1;
+   const uint32_t num_dwords = 2 + 2 * buffer_count;
  
     dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
                          GENX(3DSTATE_CONSTANT_ALL),
                          .ShaderUpdateEnable = shader_mask,
-                        .PointerBufferMask = buffers,
+                        .PointerBufferMask = buffer_mask,
                          .MOCS = cmd_buffer->device->isl_dev.mocs.internal);
  
-   for (int i = 0; i < count; i++) {
+   for (int i = 0; i < buffer_count; i++) {
        const struct anv_push_range *range = &bind_map->push_ranges[i];
-      const struct anv_address addr =
-         get_push_range_address(cmd_buffer, stage, range);
-
        GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
           &cmd_buffer->batch, dw + 2 + i * 2,
           &(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
-            .PointerToConstantBuffer = anv_address_add(addr, range->start * 32),
+            .PointerToConstantBuffer =
+               anv_address_add(buffers[i], range->start * 32),
              .ConstantBufferReadLength = range->length,
           });
     }
@@ -3083,7 +3133,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
  {
     VkShaderStageFlags flushed = 0;
     const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
-   const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
+   const struct anv_graphics_pipeline *pipeline = gfx_state->pipeline;
  
  #if GEN_GEN >= 12
     uint32_t nobuffer_stages = 0;
@@ -3092,20 +3142,64 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
     anv_foreach_stage(stage, dirty_stages) {
        unsigned buffer_count = 0;
        flushed |= mesa_to_vk_shader_stage(stage);
-      uint32_t max_push_range = 0;
+      UNUSED uint32_t max_push_range = 0;
  
+      struct anv_address buffers[4] = {};
        if (anv_pipeline_has_stage(pipeline, stage)) {
           const struct anv_pipeline_bind_map *bind_map =
              &pipeline->shaders[stage]->bind_map;
+         struct anv_push_constants *push =
+            &cmd_buffer->state.push_constants[stage];
+
+         if (cmd_buffer->device->robust_buffer_access) {
+            push->push_reg_mask = 0;
+            /* Start of the current range in the shader, relative to the start
+             * of push constants in the shader.
+             */
+            unsigned range_start_reg = 0;
+            for (unsigned i = 0; i < 4; i++) {
+               const struct anv_push_range *range = &bind_map->push_ranges[i];
+               if (range->length == 0)
+                  continue;
+
+               unsigned bound_size =
+                  get_push_range_bound_size(cmd_buffer, stage, range);
+               if (bound_size >= range->start * 32) {
+                  unsigned bound_regs =
+                     MIN2(DIV_ROUND_UP(bound_size, 32) - range->start,
+                          range->length);
+                  assert(range_start_reg + bound_regs <= 64);
+                  push->push_reg_mask |= BITFIELD64_RANGE(range_start_reg,
+                                                          bound_regs);
+               }
+
+               cmd_buffer->state.push_constants_dirty |=
+                  mesa_to_vk_shader_stage(stage);
  
+               range_start_reg += range->length;
+            }
+         }
+
+         /* We have to gather buffer addresses as a second step because the
+          * loop above puts data into the push constant area and the call to
+          * get_push_range_address is what locks our push constants and copies
+          * them into the actual GPU buffer.  If we did the two loops at the
+          * same time, we'd risk only having some of the sizes in the push
+          * constant buffer when we did the copy.
+          */
           for (unsigned i = 0; i < 4; i++) {
              const struct anv_push_range *range = &bind_map->push_ranges[i];
-            if (range->length > 0) {
-               buffer_count++;
-               if (GEN_GEN >= 12 && range->length > max_push_range)
-                  max_push_range = range->length;
-            }
+            if (range->length == 0)
+               break;
+
+            buffers[i] = get_push_range_address(cmd_buffer, stage, range);
+            max_push_range = MAX2(max_push_range, range->length);
+            buffer_count++;
           }
+
+         /* We have at most 4 buffers but they should be tightly packed */
+         for (unsigned i = buffer_count; i < 4; i++)
+            assert(bind_map->push_ranges[i].length == 0);
        }
  
  #if GEN_GEN >= 12
@@ -3123,17 +3217,17 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
         */
        if (max_push_range < 32) {
           cmd_buffer_emit_push_constant_all(cmd_buffer, 1 << stage,
-                                           buffer_count);
+                                           buffers, buffer_count);
           continue;
        }
  #endif
  
-      cmd_buffer_emit_push_constant(cmd_buffer, stage, buffer_count);
+      cmd_buffer_emit_push_constant(cmd_buffer, stage, buffers, buffer_count);
     }
  
  #if GEN_GEN >= 12
     if (nobuffer_stages)
-      cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, 0);
+      cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, NULL, 0);
  #endif
  
     cmd_buffer->state.push_constants_dirty &= ~flushed;
@@ -3142,21 +3236,21 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
  void
  genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
  {
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
     uint32_t *p;
  
-   uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used;
-   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE)
-      vb_emit |= pipeline->vb_used;
-
     assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
  
-   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
  
     genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);
  
     genX(flush_pipeline_select_3d)(cmd_buffer);
  
+   uint32_t vb_emit = cmd_buffer->state.gfx.vb_dirty & pipeline->vb_used;
+   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE)
+      vb_emit |= pipeline->vb_used;
+
     if (vb_emit) {
        const uint32_t num_buffers = __builtin_popcount(vb_emit);
        const uint32_t num_dwords = 1 + num_buffers * 4;
@@ -3235,7 +3329,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
  #endif
  
     if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) {
-      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);
  
        /* If the pipeline changed, we may need to re-allocate push constant
         * space in the URB.
@@ -3277,8 +3371,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
      * 3DSTATE_BINDING_TABLE_POINTER_* for the push constants to take effect.
      */
     uint32_t dirty = 0;
-   if (cmd_buffer->state.descriptors_dirty)
-      dirty = flush_descriptor_sets(cmd_buffer, pipeline);
+   if (cmd_buffer->state.descriptors_dirty) {
+      dirty = flush_descriptor_sets(cmd_buffer,
+                                    &cmd_buffer->state.gfx.base,
+                                    pipeline->shaders,
+                                    ARRAY_SIZE(pipeline->shaders));
+   }
  
     if (dirty || cmd_buffer->state.push_constants_dirty) {
        /* Because we're pushing UBOs, we have to push whenever either
@@ -3385,7 +3483,7 @@ static void
  update_dirty_vbs_for_gen8_vb_flush(struct anv_cmd_buffer *cmd_buffer,
                                     uint32_t access_type)
  {
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     uint64_t vb_used = pipeline->vb_used;
@@ -3408,7 +3506,7 @@ void genX(CmdDraw)(
      uint32_t                                    firstInstance)
  {
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3433,7 +3531,8 @@ void genX(CmdDraw)(
     /* Our implementation of VK_KHR_multiview uses instancing to draw the
      * different views.  We need to multiply instanceCount by the view count.
      */
-   instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
+   if (!pipeline->use_primitive_replication)
+      instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
  
     anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
        prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
@@ -3458,7 +3557,7 @@ void genX(CmdDrawIndexed)(
      uint32_t                                    firstInstance)
  {
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3483,7 +3582,8 @@ void genX(CmdDrawIndexed)(
     /* Our implementation of VK_KHR_multiview uses instancing to draw the
      * different views.  We need to multiply instanceCount by the view count.
      */
-   instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
+   if (!pipeline->use_primitive_replication)
+      instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
  
     anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
        prim.PredicateEnable          = cmd_buffer->state.conditional_render_enabled;
@@ -3519,7 +3619,7 @@ void genX(CmdDrawIndirectByteCountEXT)(
  #if GEN_IS_HASWELL || GEN_GEN >= 8
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
     ANV_FROM_HANDLE(anv_buffer, counter_buffer, counterBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     /* firstVertex is always zero for this draw function */
@@ -3544,7 +3644,8 @@ void genX(CmdDrawIndirectByteCountEXT)(
     /* Our implementation of VK_KHR_multiview uses instancing to draw the
      * different views.  We need to multiply instanceCount by the view count.
      */
-   instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
+   if (!pipeline->use_primitive_replication)
+      instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
  
     struct gen_mi_builder b;
     gen_mi_builder_init(&b, &cmd_buffer->batch);
@@ -3621,7 +3722,7 @@ void genX(CmdDrawIndirect)(
  {
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
     ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3670,7 +3771,7 @@ void genX(CmdDrawIndexedIndirect)(
  {
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
     ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3814,7 +3915,7 @@ void genX(CmdDrawIndirectCount)(
     ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
     ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
     struct anv_cmd_state *cmd_state = &cmd_buffer->state;
-   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_state->gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     if (anv_batch_has_error(&cmd_buffer->batch))
@@ -3880,7 +3981,7 @@ void genX(CmdDrawIndexedIndirectCount)(
     ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
     ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
     struct anv_cmd_state *cmd_state = &cmd_buffer->state;
-   struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
+   struct anv_graphics_pipeline *pipeline = cmd_state->gfx.pipeline;
     const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
  
     if (anv_batch_has_error(&cmd_buffer->batch))
@@ -4038,11 +4139,11 @@ void genX(CmdEndTransformFeedbackEXT)(
  void
  genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
  {
-   struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
+   struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
  
-   assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
+   assert(pipeline->cs);
  
-   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->base.l3_config);
  
     genX(flush_pipeline_select_gpgpu)(cmd_buffer);
  
@@ -4058,7 +4159,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
        cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
        genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
  
-      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
+      anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->base.batch);
  
        /* The workgroup size of the pipeline affects our push constant layout
         * so flag push constants as dirty if we change the pipeline.
@@ -4068,7 +4169,9 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
  
     if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
         cmd_buffer->state.compute.pipeline_dirty) {
-      flush_descriptor_sets(cmd_buffer, pipeline);
+      flush_descriptor_sets(cmd_buffer,
+                            &cmd_buffer->state.compute.base,
+                            &pipeline->cs, 1);
  
        uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
        struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
@@ -4172,7 +4275,7 @@ void genX(CmdDispatchBase)(
      uint32_t                                    groupCountZ)
  {
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
+   struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
     const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
  
     anv_cmd_buffer_push_base_group_id(cmd_buffer, baseGroupX,
@@ -4207,7 +4310,7 @@ void genX(CmdDispatchBase)(
        ggw.SIMDSize                     = prog_data->simd_size / 16;
        ggw.ThreadDepthCounterMaximum    = 0;
        ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = prog_data->threads - 1;
+      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
        ggw.ThreadGroupIDXDimension      = groupCountX;
        ggw.ThreadGroupIDYDimension      = groupCountY;
        ggw.ThreadGroupIDZDimension      = groupCountZ;
@@ -4229,7 +4332,7 @@ void genX(CmdDispatchIndirect)(
  {
     ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
     ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
-   struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
+   struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
     const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
     struct anv_address addr = anv_address_add(buffer->address, offset);
     struct anv_batch *batch = &cmd_buffer->batch;
@@ -4323,7 +4426,7 @@ void genX(CmdDispatchIndirect)(
        ggw.SIMDSize                     = prog_data->simd_size / 16;
        ggw.ThreadDepthCounterMaximum    = 0;
        ggw.ThreadHeightCounterMaximum   = 0;
-      ggw.ThreadWidthCounterMaximum    = prog_data->threads - 1;
+      ggw.ThreadWidthCounterMaximum    = anv_cs_threads(pipeline) - 1;
        ggw.RightExecutionMask           = pipeline->cs_right_mask;
        ggw.BottomExecutionMask          = 0xffffffff;
     }
@@ -4959,6 +5062,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
              if (iview->image->samples == 1) {
                 anv_image_ccs_op(cmd_buffer, image,
                                  iview->planes[0].isl.format,
+                                iview->planes[0].isl.swizzle,
                                  VK_IMAGE_ASPECT_COLOR_BIT,
                                  0, 0, 1, ISL_AUX_OP_FAST_CLEAR,
                                  &clear_color,
@@ -4966,6 +5070,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
              } else {
                 anv_image_mcs_op(cmd_buffer, image,
                                  iview->planes[0].isl.format,
+                                iview->planes[0].isl.swizzle,
                                  VK_IMAGE_ASPECT_COLOR_BIT,
                                  0, 1, ISL_AUX_OP_FAST_CLEAR,
                                  &clear_color,
@@ -5487,6 +5592,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
                 if (image->samples == 1) {
                    anv_cmd_predicated_ccs_resolve(cmd_buffer, image,
                                                   iview->planes[0].isl.format,
+                                                 iview->planes[0].isl.swizzle,
                                                   VK_IMAGE_ASPECT_COLOR_BIT,
                                                   iview->planes[0].isl.base_level,
                                                   array_layer,
@@ -5495,6 +5601,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer)
                 } else {
                    anv_cmd_predicated_mcs_resolve(cmd_buffer, image,
                                                   iview->planes[0].isl.format,
+                                                 iview->planes[0].isl.swizzle,
                                                   VK_IMAGE_ASPECT_COLOR_BIT,
                                                   base_layer,
                                                   ISL_AUX_OP_PARTIAL_RESOLVE,