anv: Add anv_cmd_buffer_cs_push_constants
[mesa.git] / src / vulkan / anv_cmd_buffer.c
index a986df2860cd1ba7a0a8a32d6cce66e9686841fe..d34d53dcbb35dd0e386599649e8def9c2370b5c5 100644 (file)
 
 /** \file anv_cmd_buffer.c
  *
- * This file contains functions related to anv_cmd_buffer as a data
- * structure.  This involves everything required to create and destroy
- * the actual batch buffers as well as link them together and handle
- * relocations and surface state.  It specifically does *not* contain any
- * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
+ * This file contains all of the stuff for emitting commands into a command
+ * buffer.  This includes implementations of most of the vkCmd*
+ * entrypoints.  This file is concerned entirely with state emission and
+ * not with the command buffer data structure itself.  As far as this file
+ * is concerned, most of anv_cmd_buffer is magic.
  */
 
-/*-----------------------------------------------------------------------*
- * Functions related to anv_reloc_list
- *-----------------------------------------------------------------------*/
+/* TODO: These are taken from GLES.  We should check the Vulkan spec */
+const struct anv_dynamic_state default_dynamic_state = {
+   .viewport = {
+      .count = 0,
+   },
+   .scissor = {
+      .count = 0,
+   },
+   .line_width = 1.0f,
+   .depth_bias = {
+      .bias = 0.0f,
+      .clamp = 0.0f,
+      .slope = 0.0f,
+   },
+   .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
+   .depth_bounds = {
+      .min = 0.0f,
+      .max = 1.0f,
+   },
+   .stencil_compare_mask = {
+      .front = ~0u,
+      .back = ~0u,
+   },
+   .stencil_write_mask = {
+      .front = ~0u,
+      .back = ~0u,
+   },
+   .stencil_reference = {
+      .front = 0u,
+      .back = 0u,
+   },
+};
 
-VkResult
-anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
+void
+anv_dynamic_state_copy(struct anv_dynamic_state *dest,
+                       const struct anv_dynamic_state *src,
+                       uint32_t copy_mask)
 {
-   list->num_relocs = 0;
-   list->array_length = 256;
-   list->relocs =
-      anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
-                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+      dest->viewport.count = src->viewport.count;
+      typed_memcpy(dest->viewport.viewports, src->viewport.viewports,
+                   src->viewport.count);
+   }
 
-   if (list->relocs == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+      dest->scissor.count = src->scissor.count;
+      typed_memcpy(dest->scissor.scissors, src->scissor.scissors,
+                   src->scissor.count);
+   }
 
-   list->reloc_bos =
-      anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
-                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_LINE_WIDTH))
+      dest->line_width = src->line_width;
 
-   if (list->relocs == NULL) {
-      anv_device_free(device, list->relocs);
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-   }
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))
+      dest->depth_bias = src->depth_bias;
 
-   return VK_SUCCESS;
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS))
+      typed_memcpy(dest->blend_constants, src->blend_constants, 4);
+
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS))
+      dest->depth_bounds = src->depth_bounds;
+
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))
+      dest->stencil_compare_mask = src->stencil_compare_mask;
+
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))
+      dest->stencil_write_mask = src->stencil_write_mask;
+
+   if (copy_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
+      dest->stencil_reference = src->stencil_reference;
 }
 
-void
-anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
+static void
+anv_cmd_state_init(struct anv_cmd_state *state)
 {
-   anv_device_free(device, list->relocs);
-   anv_device_free(device, list->reloc_bos);
+   memset(&state->descriptors, 0, sizeof(state->descriptors));
+   memset(&state->push_constants, 0, sizeof(state->push_constants));
+
+   state->dirty = ~0;
+   state->vb_dirty = 0;
+   state->descriptors_dirty = 0;
+   state->push_constants_dirty = 0;
+   state->pipeline = NULL;
+   state->restart_index = UINT32_MAX;
+   state->dynamic = default_dynamic_state;
+
+   state->gen7.index_buffer = NULL;
 }
 
 static VkResult
-anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
-                    size_t num_additional_relocs)
+anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
+                                          gl_shader_stage stage, uint32_t size)
 {
-   if (list->num_relocs + num_additional_relocs <= list->array_length)
-      return VK_SUCCESS;
+   struct anv_push_constants **ptr = &cmd_buffer->state.push_constants[stage];
 
-   size_t new_length = list->array_length * 2;
-   while (new_length < list->num_relocs + num_additional_relocs)
-      new_length *= 2;
+   if (*ptr == NULL) {
+      *ptr = anv_alloc(&cmd_buffer->pool->alloc, size, 8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      if (*ptr == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   } else if ((*ptr)->size < size) {
+      *ptr = anv_realloc(&cmd_buffer->pool->alloc, *ptr, size, 8,
+                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      if (*ptr == NULL)
+         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+   (*ptr)->size = size;
 
-   struct drm_i915_gem_relocation_entry *new_relocs =
-      anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
-                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
-   if (new_relocs == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   return VK_SUCCESS;
+}
+
+#define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \
+   anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \
+      (offsetof(struct anv_push_constants, field) + \
+       sizeof(cmd_buffer->state.push_constants[0]->field)))
+
+static VkResult anv_create_cmd_buffer(
+    struct anv_device *                         device,
+    struct anv_cmd_pool *                       pool,
+    VkCommandBufferLevel                        level,
+    VkCommandBuffer*                            pCommandBuffer)
+{
+   struct anv_cmd_buffer *cmd_buffer;
+   VkResult result;
 
-   struct anv_bo **new_reloc_bos =
-      anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
-                       VK_SYSTEM_ALLOC_TYPE_INTERNAL);
-   if (new_relocs == NULL) {
-      anv_device_free(device, new_relocs);
+   cmd_buffer = anv_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (cmd_buffer == NULL)
       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-   }
 
-   memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
-   memcpy(new_reloc_bos, list->reloc_bos,
-          list->num_relocs * sizeof(*list->reloc_bos));
+   cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
+   cmd_buffer->device = device;
+   cmd_buffer->pool = pool;
 
-   anv_device_free(device, list->relocs);
-   anv_device_free(device, list->reloc_bos);
+   result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
+   if (result != VK_SUCCESS)
+      goto fail;
+
+   anv_state_stream_init(&cmd_buffer->surface_state_stream,
+                         &device->surface_state_block_pool);
+   anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
+                         &device->dynamic_state_block_pool);
 
-   list->relocs = new_relocs;
-   list->reloc_bos = new_reloc_bos;
+   cmd_buffer->level = level;
+   cmd_buffer->usage_flags = 0;
+
+   anv_cmd_state_init(&cmd_buffer->state);
+
+   if (pool) {
+      list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
+   } else {
+      /* Init the pool_link so we can safefly call list_del when we destroy
+       * the command buffer
+       */
+      list_inithead(&cmd_buffer->pool_link);
+   }
+
+   *pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
 
    return VK_SUCCESS;
+
+ fail:
+   anv_free(&cmd_buffer->pool->alloc, cmd_buffer);
+
+   return result;
 }
 
-uint64_t
-anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
-                   uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
+VkResult anv_AllocateCommandBuffers(
+    VkDevice                                    _device,
+    const VkCommandBufferAllocateInfo*          pAllocateInfo,
+    VkCommandBuffer*                            pCommandBuffers)
 {
-   struct drm_i915_gem_relocation_entry *entry;
-   int index;
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_cmd_pool, pool, pAllocateInfo->commandPool);
+
+   VkResult result = VK_SUCCESS;
+   uint32_t i;
 
-   anv_reloc_list_grow(list, device, 1);
-   /* TODO: Handle failure */
+   for (i = 0; i < pAllocateInfo->bufferCount; i++) {
+      result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level,
+                                     &pCommandBuffers[i]);
+      if (result != VK_SUCCESS)
+         break;
+   }
 
-   /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
-   index = list->num_relocs++;
-   list->reloc_bos[index] = target_bo;
-   entry = &list->relocs[index];
-   entry->target_handle = target_bo->gem_handle;
-   entry->delta = delta;
-   entry->offset = offset;
-   entry->presumed_offset = target_bo->offset;
-   entry->read_domains = 0;
-   entry->write_domain = 0;
+   if (result != VK_SUCCESS)
+      anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool,
+                             i, pCommandBuffers);
 
-   return target_bo->offset + delta;
+   return result;
 }
 
 static void
-anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
-                      struct anv_reloc_list *other, uint32_t offset)
+anv_cmd_buffer_destroy(struct anv_cmd_buffer *cmd_buffer)
 {
-   anv_reloc_list_grow(list, device, other->num_relocs);
-   /* TODO: Handle failure */
+   list_del(&cmd_buffer->pool_link);
 
-   memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
-          other->num_relocs * sizeof(other->relocs[0]));
-   memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
-          other->num_relocs * sizeof(other->reloc_bos[0]));
+   anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
 
-   for (uint32_t i = 0; i < other->num_relocs; i++)
-      list->relocs[i + list->num_relocs].offset += offset;
+   anv_state_stream_finish(&cmd_buffer->surface_state_stream);
+   anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
 
-   list->num_relocs += other->num_relocs;
+   anv_free(&cmd_buffer->pool->alloc, cmd_buffer);
 }
 
-/*-----------------------------------------------------------------------*
- * Functions related to anv_batch
- *-----------------------------------------------------------------------*/
+void anv_FreeCommandBuffers(
+    VkDevice                                    device,
+    VkCommandPool                               commandPool,
+    uint32_t                                    commandBufferCount,
+    const VkCommandBuffer*                      pCommandBuffers)
+{
+   for (uint32_t i = 0; i < commandBufferCount; i++) {
+      ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
+
+      anv_cmd_buffer_destroy(cmd_buffer);
+   }
+}
 
-void *
-anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
+VkResult anv_ResetCommandBuffer(
+    VkCommandBuffer                             commandBuffer,
+    VkCommandBufferResetFlags                   flags)
 {
-   if (batch->next + num_dwords * 4 > batch->end)
-      batch->extend_cb(batch, batch->user_data);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   void *p = batch->next;
+   anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
 
-   batch->next += num_dwords * 4;
-   assert(batch->next <= batch->end);
+   anv_cmd_state_init(&cmd_buffer->state);
 
-   return p;
+   return VK_SUCCESS;
 }
 
-uint64_t
-anv_batch_emit_reloc(struct anv_batch *batch,
-                     void *location, struct anv_bo *bo, uint32_t delta)
+void
+anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
 {
-   return anv_reloc_list_add(&batch->relocs, batch->device,
-                             location - batch->start, bo, delta);
+   switch (cmd_buffer->device->info.gen) {
+   case 7:
+      if (cmd_buffer->device->info.is_haswell)
+         return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+      else
+         return gen7_cmd_buffer_emit_state_base_address(cmd_buffer);
+   case 8:
+      return gen8_cmd_buffer_emit_state_base_address(cmd_buffer);
+   case 9:
+      return gen9_cmd_buffer_emit_state_base_address(cmd_buffer);
+   default:
+      unreachable("unsupported gen\n");
+   }
 }
 
-void
-anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
+VkResult anv_BeginCommandBuffer(
+    VkCommandBuffer                             commandBuffer,
+    const VkCommandBufferBeginInfo*             pBeginInfo)
 {
-   uint32_t size, offset;
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   size = other->next - other->start;
-   assert(size % 4 == 0);
+   anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
 
-   if (batch->next + size > batch->end)
-      batch->extend_cb(batch, batch->user_data);
+   cmd_buffer->usage_flags = pBeginInfo->flags;
 
-   assert(batch->next + size <= batch->end);
+   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+      cmd_buffer->state.framebuffer =
+         anv_framebuffer_from_handle(pBeginInfo->framebuffer);
+      cmd_buffer->state.pass =
+         anv_render_pass_from_handle(pBeginInfo->renderPass);
 
-   VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size));
-   memcpy(batch->next, other->start, size);
+      struct anv_subpass *subpass =
+         &cmd_buffer->state.pass->subpasses[pBeginInfo->subpass];
 
-   offset = batch->next - batch->start;
-   anv_reloc_list_append(&batch->relocs, batch->device,
-                         &other->relocs, offset);
+      anv_cmd_buffer_begin_subpass(cmd_buffer, subpass);
+   }
 
-   batch->next += size;
-}
+   anv_cmd_buffer_emit_state_base_address(cmd_buffer);
+   cmd_buffer->state.current_pipeline = UINT32_MAX;
 
-/*-----------------------------------------------------------------------*
- * Functions related to anv_batch_bo
- *-----------------------------------------------------------------------*/
+   return VK_SUCCESS;
+}
 
-static VkResult
-anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
+VkResult anv_EndCommandBuffer(
+    VkCommandBuffer                             commandBuffer)
 {
-   VkResult result;
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct anv_device *device = cmd_buffer->device;
 
-   struct anv_batch_bo *bbo =
-      anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
-   if (bbo == NULL)
-      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   anv_cmd_buffer_end_batch_buffer(cmd_buffer);
 
-   bbo->num_relocs = 0;
+   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+      /* The algorithm used to compute the validate list is not threadsafe as
+       * it uses the bo->index field.  We have to lock the device around it.
+       * Fortunately, the chances for contention here are probably very low.
+       */
+      pthread_mutex_lock(&device->mutex);
+      anv_cmd_buffer_prepare_execbuf(cmd_buffer);
+      pthread_mutex_unlock(&device->mutex);
+   }
+
+   return VK_SUCCESS;
+}
 
-   result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
-   if (result != VK_SUCCESS) {
-      anv_device_free(device, bbo);
-      return result;
+void anv_CmdBindPipeline(
+    VkCommandBuffer                             commandBuffer,
+    VkPipelineBindPoint                         pipelineBindPoint,
+    VkPipeline                                  _pipeline)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
+
+   switch (pipelineBindPoint) {
+   case VK_PIPELINE_BIND_POINT_COMPUTE:
+      cmd_buffer->state.compute_pipeline = pipeline;
+      cmd_buffer->state.compute_dirty |= ANV_CMD_DIRTY_PIPELINE;
+      cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
+      break;
+
+   case VK_PIPELINE_BIND_POINT_GRAPHICS:
+      cmd_buffer->state.pipeline = pipeline;
+      cmd_buffer->state.vb_dirty |= pipeline->vb_used;
+      cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
+      cmd_buffer->state.push_constants_dirty |= pipeline->active_stages;
+
+      /* Apply the dynamic state from the pipeline */
+      cmd_buffer->state.dirty |= pipeline->dynamic_state_mask;
+      anv_dynamic_state_copy(&cmd_buffer->state.dynamic,
+                             &pipeline->dynamic_state,
+                             pipeline->dynamic_state_mask);
+      break;
+
+   default:
+      assert(!"invalid bind point");
+      break;
    }
+}
+
+void anv_CmdSetViewport(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    viewportCount,
+    const VkViewport*                           pViewports)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   *bbo_out = bbo;
+   cmd_buffer->state.dynamic.viewport.count = viewportCount;
+   memcpy(cmd_buffer->state.dynamic.viewport.viewports,
+          pViewports, viewportCount * sizeof(*pViewports));
 
-   return VK_SUCCESS;
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
 }
 
-static void
-anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
-                   size_t batch_padding)
+void anv_CmdSetScissor(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    scissorCount,
+    const VkRect2D*                             pScissors)
 {
-   batch->next = batch->start = bbo->bo.map;
-   batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
-   bbo->first_reloc = batch->relocs.num_relocs;
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   cmd_buffer->state.dynamic.scissor.count = scissorCount;
+   memcpy(cmd_buffer->state.dynamic.scissor.scissors,
+          pScissors, scissorCount * sizeof(*pScissors));
+
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
 }
 
-static void
-anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
+void anv_CmdSetLineWidth(
+    VkCommandBuffer                             commandBuffer,
+    float                                       lineWidth)
 {
-   /* Round batch up to an even number of dwords. */
-   if ((batch->next - batch->start) & 4)
-      anv_batch_emit(batch, GEN8_MI_NOOP);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   assert(batch->start == bbo->bo.map);
-   bbo->length = batch->next - batch->start;
-   VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
-   bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
+   cmd_buffer->state.dynamic.line_width = lineWidth;
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
 }
 
-static void
-anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
+void anv_CmdSetDepthBias(
+    VkCommandBuffer                             commandBuffer,
+    float                                       depthBiasConstantFactor,
+    float                                       depthBiasClamp,
+    float                                       depthBiasSlopeFactor)
 {
-   anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
-   anv_device_free(device, bbo);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   cmd_buffer->state.dynamic.depth_bias.bias = depthBiasConstantFactor;
+   cmd_buffer->state.dynamic.depth_bias.clamp = depthBiasClamp;
+   cmd_buffer->state.dynamic.depth_bias.slope = depthBiasSlopeFactor;
+
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
 }
 
-/*-----------------------------------------------------------------------*
- * Functions related to anv_batch_bo
- *-----------------------------------------------------------------------*/
+void anv_CmdSetBlendConstants(
+    VkCommandBuffer                             commandBuffer,
+    const float                                 blendConstants[4])
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-static inline struct anv_batch_bo *
-anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
+   memcpy(cmd_buffer->state.dynamic.blend_constants,
+          blendConstants, sizeof(float) * 4);
+
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+}
+
+void anv_CmdSetDepthBounds(
+    VkCommandBuffer                             commandBuffer,
+    float                                       minDepthBounds,
+    float                                       maxDepthBounds)
 {
-   return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   cmd_buffer->state.dynamic.depth_bounds.min = minDepthBounds;
+   cmd_buffer->state.dynamic.depth_bounds.max = maxDepthBounds;
+
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
 }
 
-static inline struct anv_batch_bo *
-anv_cmd_buffer_current_surface_bbo(struct anv_cmd_buffer *cmd_buffer)
+void anv_CmdSetStencilCompareMask(
+    VkCommandBuffer                             commandBuffer,
+    VkStencilFaceFlags                          faceMask,
+    uint32_t                                    compareMask)
 {
-   return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->surface_bos.prev, link);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask;
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask;
+
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
 }
 
-struct anv_bo *
-anv_cmd_buffer_current_surface_bo(struct anv_cmd_buffer *cmd_buffer)
+void anv_CmdSetStencilWriteMask(
+    VkCommandBuffer                             commandBuffer,
+    VkStencilFaceFlags                          faceMask,
+    uint32_t                                    writeMask)
 {
-   return &anv_cmd_buffer_current_surface_bbo(cmd_buffer)->bo;
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask;
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask;
+
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
 }
 
-static VkResult
-anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
+void anv_CmdSetStencilReference(
+    VkCommandBuffer                             commandBuffer,
+    VkStencilFaceFlags                          faceMask,
+    uint32_t                                    reference)
 {
-   struct anv_cmd_buffer *cmd_buffer = _data;
-   struct anv_batch_bo *new_bbo, *old_bbo =
-      anv_cmd_buffer_current_batch_bo(cmd_buffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
-   if (result != VK_SUCCESS)
-      return result;
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      cmd_buffer->state.dynamic.stencil_reference.front = reference;
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      cmd_buffer->state.dynamic.stencil_reference.back = reference;
 
-   /* We set the end of the batch a little short so we would be sure we
-    * have room for the chaining command.  Since we're about to emit the
-    * chaining command, let's set it back where it should go.
-    */
-   batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
-   assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
+}
 
-   anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
-      GEN8_MI_BATCH_BUFFER_START_header,
-      ._2ndLevelBatchBuffer = _1stlevelbatch,
-      .AddressSpaceIndicator = ASI_PPGTT,
-      .BatchBufferStartAddress = { &new_bbo->bo, 0 },
-   );
+void anv_CmdBindDescriptorSets(
+    VkCommandBuffer                             commandBuffer,
+    VkPipelineBindPoint                         pipelineBindPoint,
+    VkPipelineLayout                            _layout,
+    uint32_t                                    firstSet,
+    uint32_t                                    descriptorSetCount,
+    const VkDescriptorSet*                      pDescriptorSets,
+    uint32_t                                    dynamicOffsetCount,
+    const uint32_t*                             pDynamicOffsets)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
+   struct anv_descriptor_set_layout *set_layout;
 
-   anv_batch_bo_finish(old_bbo, batch);
+   assert(firstSet + descriptorSetCount < MAX_SETS);
 
-   list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
+   uint32_t dynamic_slot = 0;
+   for (uint32_t i = 0; i < descriptorSetCount; i++) {
+      ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
+      set_layout = layout->set[firstSet + i].layout;
 
-   anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
+      if (cmd_buffer->state.descriptors[firstSet + i] != set) {
+         cmd_buffer->state.descriptors[firstSet + i] = set;
+         cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
+      }
 
-   return VK_SUCCESS;
+      if (set_layout->dynamic_offset_count > 0) {
+         anv_foreach_stage(s, set_layout->shader_stages) {
+            anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, s, dynamic);
+
+            struct anv_push_constants *push =
+               cmd_buffer->state.push_constants[s];
+
+            unsigned d = layout->set[firstSet + i].dynamic_offset_start;
+            const uint32_t *offsets = pDynamicOffsets + dynamic_slot;
+            struct anv_descriptor *desc = set->descriptors;
+
+            for (unsigned b = 0; b < set_layout->binding_count; b++) {
+               if (set_layout->binding[b].dynamic_offset_index < 0)
+                  continue;
+
+               unsigned array_size = set_layout->binding[b].array_size;
+               for (unsigned j = 0; j < array_size; j++) {
+                  push->dynamic[d].offset = *(offsets++);
+                  push->dynamic[d].range = (desc++)->range;
+                  d++;
+               }
+            }
+         }
+         cmd_buffer->state.push_constants_dirty |= set_layout->shader_stages;
+      }
+   }
 }
 
-struct anv_state
-anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
-                                   uint32_t size, uint32_t alignment)
+void anv_CmdBindVertexBuffers(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    startBinding,
+    uint32_t                                    bindingCount,
+    const VkBuffer*                             pBuffers,
+    const VkDeviceSize*                         pOffsets)
 {
-   struct anv_bo *surface_bo =
-      anv_cmd_buffer_current_surface_bo(cmd_buffer);
-   struct anv_state state;
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
 
-   state.offset = align_u32(cmd_buffer->surface_next, alignment);
-   if (state.offset + size > surface_bo->size)
-      return (struct anv_state) { 0 };
+   /* We have to defer setting up vertex buffer since we need the buffer
+    * stride from the pipeline. */
 
-   state.map = surface_bo->map + state.offset;
-   state.alloc_size = size;
-   cmd_buffer->surface_next = state.offset + size;
+   assert(startBinding + bindingCount < MAX_VBS);
+   for (uint32_t i = 0; i < bindingCount; i++) {
+      vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
+      vb[startBinding + i].offset = pOffsets[i];
+      cmd_buffer->state.vb_dirty |= 1 << (startBinding + i);
+   }
+}
 
-   assert(state.offset + size <= surface_bo->size);
+static void
+add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
+                        struct anv_state state, struct anv_bo *bo, uint32_t offset)
+{
+   /* The address goes in SURFACE_STATE dword 1 for gens < 8 and dwords 8 and
+    * 9 for gen8+.  We only write the first dword for gen8+ here and rely on
+    * the initial state to set the high bits to 0. */
 
-   return state;
+   const uint32_t dword = cmd_buffer->device->info.gen < 8 ? 1 : 8;
+
+   anv_reloc_list_add(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc,
+                      state.offset + dword * 4, bo, offset);
 }
 
-struct anv_state
-anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
-                                   uint32_t size, uint32_t alignment)
+static void
+fill_descriptor_buffer_surface_state(struct anv_device *device, void *state,
+                                     gl_shader_stage stage,
+                                     VkDescriptorType type,
+                                     uint32_t offset, uint32_t range)
 {
-   return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
-                                 size, alignment);
+   VkFormat format;
+   uint32_t stride;
+
+   switch (type) {
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      if (device->instance->physicalDevice.compiler->scalar_stage[stage]) {
+         stride = 4;
+      } else {
+         stride = 16;
+      }
+      format = VK_FORMAT_R32G32B32A32_SFLOAT;
+      break;
+
+   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+      stride = 1;
+      format = VK_FORMAT_UNDEFINED;
+      break;
+
+   default:
+      unreachable("Invalid descriptor type");
+   }
+
+   anv_fill_buffer_surface_state(device, state,
+                                 anv_format_for_vk_format(format),
+                                 offset, range, stride);
 }
 
 VkResult
-anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
+anv_cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
+                                  gl_shader_stage stage,
+                                  struct anv_state *bt_state)
 {
-   struct anv_batch_bo *new_bbo, *old_bbo =
-      anv_cmd_buffer_current_surface_bbo(cmd_buffer);
+   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
+   struct anv_subpass *subpass = cmd_buffer->state.subpass;
+   struct anv_pipeline_layout *layout;
+   uint32_t color_count, bias, state_offset;
+
+   if (stage == MESA_SHADER_COMPUTE)
+      layout = cmd_buffer->state.compute_pipeline->layout;
+   else
+      layout = cmd_buffer->state.pipeline->layout;
+
+   if (stage == MESA_SHADER_FRAGMENT) {
+      bias = MAX_RTS;
+      color_count = subpass->color_count;
+   } else {
+      bias = 0;
+      color_count = 0;
+   }
 
-   /* Finish off the old buffer */
-   old_bbo->num_relocs =
-      cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
-   old_bbo->length = cmd_buffer->surface_next;
+   /* This is a little awkward: layout can be NULL but we still have to
+    * allocate and set a binding table for the PS stage for render
+    * targets. */
+   uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
 
-   VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
-   if (result != VK_SUCCESS)
-      return result;
+   if (color_count + surface_count == 0)
+      return VK_SUCCESS;
+
+   *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer,
+                                                  bias + surface_count,
+                                                  &state_offset);
+   uint32_t *bt_map = bt_state->map;
+
+   if (bt_state->map == NULL)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+   for (uint32_t a = 0; a < color_count; a++) {
+      const struct anv_image_view *iview =
+         fb->attachments[subpass->color_attachments[a]];
+
+      bt_map[a] = iview->color_rt_surface_state.offset + state_offset;
+      add_surface_state_reloc(cmd_buffer, iview->color_rt_surface_state,
+                              iview->bo, iview->offset);
+   }
+
+   if (layout == NULL)
+      goto out;
+
+   if (layout->stage[stage].image_count > 0) {
+      VkResult result =
+         anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images);
+      if (result != VK_SUCCESS)
+         return result;
+
+      cmd_buffer->state.push_constants_dirty |= 1 << stage;
+   }
+
+   uint32_t image = 0;
+   for (uint32_t s = 0; s < layout->stage[stage].surface_count; s++) {
+      struct anv_pipeline_binding *binding =
+         &layout->stage[stage].surface_to_descriptor[s];
+      struct anv_descriptor_set *set =
+         cmd_buffer->state.descriptors[binding->set];
+      struct anv_descriptor *desc = &set->descriptors[binding->offset];
+
+      struct anv_state surface_state;
+      struct anv_bo *bo;
+      uint32_t bo_offset;
+
+      switch (desc->type) {
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+         /* Nothing for us to do here */
+         continue;
+
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+         bo = desc->buffer->bo;
+         bo_offset = desc->buffer->offset + desc->offset;
+
+         surface_state =
+            anv_cmd_buffer_alloc_surface_state(cmd_buffer);
+
+         fill_descriptor_buffer_surface_state(cmd_buffer->device,
+                                              surface_state.map,
+                                              stage, desc->type,
+                                              bo_offset, desc->range);
+
+         if (!cmd_buffer->device->info.has_llc)
+            anv_state_clflush(surface_state);
+
+         break;
+      }
+
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+         surface_state = desc->image_view->nonrt_surface_state;
+         bo = desc->image_view->bo;
+         bo_offset = desc->image_view->offset;
+         break;
+
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
+         surface_state = desc->image_view->storage_surface_state;
+         bo = desc->image_view->bo;
+         bo_offset = desc->image_view->offset;
+
+         struct brw_image_param *image_param =
+            &cmd_buffer->state.push_constants[stage]->images[image++];
+
+         anv_image_view_fill_image_param(cmd_buffer->device, desc->image_view,
+                                         image_param);
+         image_param->surface_idx = bias + s;
+         break;
+      }
+
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+         assert(!"Unsupported descriptor type");
+         break;
+
+      default:
+         assert(!"Invalid descriptor type");
+         continue;
+      }
 
-   new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
-   cmd_buffer->surface_next = 1;
+      bt_map[bias + s] = surface_state.offset + state_offset;
+      add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
+   }
+   assert(image == layout->stage[stage].image_count);
 
-   list_addtail(&new_bbo->link, &cmd_buffer->surface_bos);
+ out:
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(*bt_state);
 
    return VK_SUCCESS;
 }
 
 VkResult
-anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
+anv_cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
+                             gl_shader_stage stage, struct anv_state *state)
 {
-   struct anv_batch_bo *batch_bo, *surface_bbo;
-   struct anv_device *device = cmd_buffer->device;
-   VkResult result;
+   struct anv_pipeline_layout *layout;
+   uint32_t sampler_count;
 
-   list_inithead(&cmd_buffer->batch_bos);
-   list_inithead(&cmd_buffer->surface_bos);
+   if (stage == MESA_SHADER_COMPUTE)
+      layout = cmd_buffer->state.compute_pipeline->layout;
+   else
+      layout = cmd_buffer->state.pipeline->layout;
 
-   result = anv_batch_bo_create(device, &batch_bo);
-   if (result != VK_SUCCESS)
-      return result;
-
-   list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
+   sampler_count = layout ? layout->stage[stage].sampler_count : 0;
+   if (sampler_count == 0)
+      return VK_SUCCESS;
 
-   result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
-   if (result != VK_SUCCESS)
-      goto fail_batch_bo;
+   uint32_t size = sampler_count * 16;
+   *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32);
 
-   cmd_buffer->batch.device = device;
-   cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
-   cmd_buffer->batch.user_data = cmd_buffer;
+   if (state->map == NULL)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
 
-   anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
-                      GEN8_MI_BATCH_BUFFER_START_length * 4);
+   for (uint32_t s = 0; s < layout->stage[stage].sampler_count; s++) {
+      struct anv_pipeline_binding *binding =
+         &layout->stage[stage].sampler_to_descriptor[s];
+      struct anv_descriptor_set *set =
+         cmd_buffer->state.descriptors[binding->set];
+      struct anv_descriptor *desc = &set->descriptors[binding->offset];
 
-   result = anv_batch_bo_create(device, &surface_bbo);
-   if (result != VK_SUCCESS)
-      goto fail_batch_relocs;
+      if (desc->type != VK_DESCRIPTOR_TYPE_SAMPLER &&
+          desc->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+         continue;
 
-   surface_bbo->first_reloc = 0;
-   list_addtail(&surface_bbo->link, &cmd_buffer->surface_bos);
+      struct anv_sampler *sampler = desc->sampler;
 
-   result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
-   if (result != VK_SUCCESS)
-      goto fail_ss_batch_bo;
+      /* This can happen if we have an unfilled slot since TYPE_SAMPLER
+       * happens to be zero.
+       */
+      if (sampler == NULL)
+         continue;
 
-   /* Start surface_next at 1 so surface offset 0 is invalid. */
-   cmd_buffer->surface_next = 1;
+      memcpy(state->map + (s * 16),
+             sampler->state, sizeof(sampler->state));
+   }
 
-   cmd_buffer->execbuf2.objects = NULL;
-   cmd_buffer->execbuf2.bos = NULL;
-   cmd_buffer->execbuf2.array_length = 0;
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(*state);
 
    return VK_SUCCESS;
+}
 
- fail_ss_batch_bo:
-   anv_batch_bo_destroy(surface_bbo, device);
- fail_batch_relocs:
-   anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
- fail_batch_bo:
-   anv_batch_bo_destroy(batch_bo, device);
+struct anv_state
+anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
+                            const void *data, uint32_t size, uint32_t alignment)
+{
+   struct anv_state state;
 
-   return result;
+   state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
+   memcpy(state.map, data, size);
+
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(state);
+
+   VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
+
+   return state;
 }
 
-void
-anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
+struct anv_state
+anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
+                             uint32_t *a, uint32_t *b,
+                             uint32_t dwords, uint32_t alignment)
 {
-   struct anv_device *device = cmd_buffer->device;
+   struct anv_state state;
+   uint32_t *p;
 
-   /* Destroy all of the batch buffers */
-   list_for_each_entry_safe(struct anv_batch_bo, bbo,
-                            &cmd_buffer->batch_bos, link) {
-      anv_batch_bo_destroy(bbo, device);
-   }
-   anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
+   state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+                                              dwords * 4, alignment);
+   p = state.map;
+   for (uint32_t i = 0; i < dwords; i++)
+      p[i] = a[i] | b[i];
 
-   /* Destroy all of the surface state buffers */
-   list_for_each_entry_safe(struct anv_batch_bo, bbo,
-                            &cmd_buffer->surface_bos, link) {
-      anv_batch_bo_destroy(bbo, device);
-   }
-   anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(state);
+
+   VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
 
-   anv_device_free(device, cmd_buffer->execbuf2.objects);
-   anv_device_free(device, cmd_buffer->execbuf2.bos);
+   return state;
 }
 
 void
-anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
+anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
+                             struct anv_subpass *subpass)
 {
-   struct anv_device *device = cmd_buffer->device;
-
-   /* Delete all but the first batch bo */
-   assert(!list_empty(&cmd_buffer->batch_bos));
-   while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
-      struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
-      list_del(&bbo->link);
-      anv_batch_bo_destroy(bbo, device);
-   }
-   assert(!list_empty(&cmd_buffer->batch_bos));
-
-   cmd_buffer->batch.relocs.num_relocs = 0;
-   anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
-                      &cmd_buffer->batch,
-                      GEN8_MI_BATCH_BUFFER_START_length * 4);
-
-   /* Delete all but the first batch bo */
-   assert(!list_empty(&cmd_buffer->batch_bos));
-   while (cmd_buffer->surface_bos.next != cmd_buffer->surface_bos.prev) {
-      struct anv_batch_bo *bbo = anv_cmd_buffer_current_surface_bbo(cmd_buffer);
-      list_del(&bbo->link);
-      anv_batch_bo_destroy(bbo, device);
+   switch (cmd_buffer->device->info.gen) {
+   case 7:
+      gen7_cmd_buffer_begin_subpass(cmd_buffer, subpass);
+      break;
+   case 8:
+      gen8_cmd_buffer_begin_subpass(cmd_buffer, subpass);
+      break;
+   case 9:
+      gen9_cmd_buffer_begin_subpass(cmd_buffer, subpass);
+      break;
+   default:
+      unreachable("unsupported gen\n");
    }
-   assert(!list_empty(&cmd_buffer->batch_bos));
+}
 
-   cmd_buffer->surface_next = 1;
-   cmd_buffer->surface_relocs.num_relocs = 0;
+void anv_CmdSetEvent(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     event,
+    VkPipelineStageFlags                        stageMask)
+{
+   stub();
 }
 
-static VkResult
-anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
-                      struct anv_bo *bo,
-                      struct drm_i915_gem_relocation_entry *relocs,
-                      size_t num_relocs)
+void anv_CmdResetEvent(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     event,
+    VkPipelineStageFlags                        stageMask)
 {
-   struct drm_i915_gem_exec_object2 *obj;
+   stub();
+}
 
-   if (bo->index < cmd_buffer->execbuf2.bo_count &&
-       cmd_buffer->execbuf2.bos[bo->index] == bo)
-      return VK_SUCCESS;
+void anv_CmdWaitEvents(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    eventCount,
+    const VkEvent*                              pEvents,
+    VkPipelineStageFlags                        srcStageMask,
+    VkPipelineStageFlags                        destStageMask,
+    uint32_t                                    memBarrierCount,
+    const void* const*                          ppMemBarriers)
+{
+   stub();
+}
 
-   if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) {
-      uint32_t new_len = cmd_buffer->execbuf2.objects ?
-                         cmd_buffer->execbuf2.array_length * 2 : 64;
+struct anv_state
+anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
+                              gl_shader_stage stage)
+{
+   struct anv_push_constants *data =
+      cmd_buffer->state.push_constants[stage];
+   struct brw_stage_prog_data *prog_data =
+      cmd_buffer->state.pipeline->prog_data[stage];
+
+   /* If we don't actually have any push constants, bail. */
+   if (data == NULL || prog_data->nr_params == 0)
+      return (struct anv_state) { .offset = 0 };
+
+   struct anv_state state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+                                         prog_data->nr_params * sizeof(float),
+                                         32 /* bottom 5 bits MBZ */);
+
+   /* Walk through the param array and fill the buffer with data */
+   uint32_t *u32_map = state.map;
+   for (unsigned i = 0; i < prog_data->nr_params; i++) {
+      uint32_t offset = (uintptr_t)prog_data->param[i];
+      u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
+   }
 
-      struct drm_i915_gem_exec_object2 *new_objects =
-         anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
-                          8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
-      if (new_objects == NULL)
-         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(state);
 
-      struct anv_bo **new_bos =
-         anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
-                          8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
-      if (new_objects == NULL) {
-         anv_device_free(cmd_buffer->device, new_objects);
-         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-      }
+   return state;
+}
 
-      if (cmd_buffer->execbuf2.objects) {
-         memcpy(new_objects, cmd_buffer->execbuf2.objects,
-                cmd_buffer->execbuf2.bo_count * sizeof(*new_objects));
-         memcpy(new_bos, cmd_buffer->execbuf2.bos,
-                cmd_buffer->execbuf2.bo_count * sizeof(*new_bos));
-      }
+struct anv_state
+anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
+{
+   struct anv_push_constants *data =
+      cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
+   struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
+   const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+
+   const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
+   const unsigned push_constant_data_size =
+      (local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);
+   const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
+   const unsigned param_aligned_count =
+      reg_aligned_constant_size / sizeof(uint32_t);
+
+   /* If we don't actually have any push constants, bail. */
+   if (reg_aligned_constant_size == 0)
+      return (struct anv_state) { .offset = 0 };
+
+   const unsigned threads = pipeline->cs_thread_width_max;
+   const unsigned total_push_constants_size =
+      reg_aligned_constant_size * threads;
+   struct anv_state state =
+      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+                                         total_push_constants_size,
+                                         32 /* bottom 5 bits MBZ */);
+
+   /* Walk through the param array and fill the buffer with data */
+   uint32_t *u32_map = state.map;
+
+   brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads,
+                                reg_aligned_constant_size);
+
+   /* Setup uniform data for the first thread */
+   for (unsigned i = 0; i < prog_data->nr_params; i++) {
+      uint32_t offset = (uintptr_t)prog_data->param[i];
+      u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset);
+   }
 
-      cmd_buffer->execbuf2.objects = new_objects;
-      cmd_buffer->execbuf2.bos = new_bos;
-      cmd_buffer->execbuf2.array_length = new_len;
+   /* Copy uniform data from the first thread to every other thread */
+   const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t);
+   for (unsigned t = 1; t < threads; t++) {
+      memcpy(&u32_map[t * param_aligned_count + local_id_dwords],
+             &u32_map[local_id_dwords],
+             uniform_data_size);
    }
 
-   assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length);
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(state);
+
+   return state;
+}
 
-   bo->index = cmd_buffer->execbuf2.bo_count++;
-   obj = &cmd_buffer->execbuf2.objects[bo->index];
-   cmd_buffer->execbuf2.bos[bo->index] = bo;
+void anv_CmdPushConstants(
+    VkCommandBuffer                             commandBuffer,
+    VkPipelineLayout                            layout,
+    VkShaderStageFlags                          stageFlags,
+    uint32_t                                    offset,
+    uint32_t                                    size,
+    const void*                                 pValues)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   obj->handle = bo->gem_handle;
-   obj->relocation_count = 0;
-   obj->relocs_ptr = 0;
-   obj->alignment = 0;
-   obj->offset = bo->offset;
-   obj->flags = 0;
-   obj->rsvd1 = 0;
-   obj->rsvd2 = 0;
+   anv_foreach_stage(stage, stageFlags) {
+      anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, client_data);
 
-   if (relocs) {
-      obj->relocation_count = num_relocs;
-      obj->relocs_ptr = (uintptr_t) relocs;
+      memcpy(cmd_buffer->state.push_constants[stage]->client_data + offset,
+             pValues, size);
    }
 
-   return VK_SUCCESS;
+   cmd_buffer->state.push_constants_dirty |= stageFlags;
 }
 
-static void
-anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
-                                struct anv_reloc_list *list)
+void anv_CmdExecuteCommands(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    commandBuffersCount,
+    const VkCommandBuffer*                      pCmdBuffers)
 {
-   for (size_t i = 0; i < list->num_relocs; i++)
-      anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
-}
+   ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer);
 
-static void
-anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
-                              struct anv_reloc_list *list)
-{
-   struct anv_bo *bo;
+   assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+   anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]);
 
-   /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
-    * struct drm_i915_gem_exec_object2 against the bos current offset and if
-    * all bos haven't moved it will skip relocation processing alltogether.
-    * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
-    * value of offset so we can set it either way.  For that to work we need
-    * to make sure all relocs use the same presumed offset.
-    */
+   for (uint32_t i = 0; i < commandBuffersCount; i++) {
+      ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
 
-   for (size_t i = 0; i < list->num_relocs; i++) {
-      bo = list->reloc_bos[i];
-      if (bo->offset != list->relocs[i].presumed_offset)
-         cmd_buffer->execbuf2.need_reloc = true;
+      assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
 
-      list->relocs[i].target_handle = bo->index;
+      anv_cmd_buffer_add_secondary(primary, secondary);
    }
 }
 
-void
-anv_cmd_buffer_emit_batch_buffer_end(struct anv_cmd_buffer *cmd_buffer)
+VkResult anv_CreateCommandPool(
+    VkDevice                                    _device,
+    const VkCommandPoolCreateInfo*              pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkCommandPool*                              pCmdPool)
 {
-   struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
-   struct anv_batch_bo *surface_bbo =
-      anv_cmd_buffer_current_surface_bbo(cmd_buffer);
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_cmd_pool *pool;
+
+   pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
+                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (pool == NULL)
+      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   if (pAllocator)
+      pool->alloc = *pAllocator;
+   else
+      pool->alloc = device->alloc;
 
-   anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END);
+   list_inithead(&pool->cmd_buffers);
 
-   anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
+   *pCmdPool = anv_cmd_pool_to_handle(pool);
 
-   surface_bbo->num_relocs =
-      cmd_buffer->surface_relocs.num_relocs - surface_bbo->first_reloc;
-   surface_bbo->length = cmd_buffer->surface_next;
+   return VK_SUCCESS;
 }
 
-void
-anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
+void anv_DestroyCommandPool(
+    VkDevice                                    _device,
+    VkCommandPool                               commandPool,
+    const VkAllocationCallbacks*                pAllocator)
 {
-   struct anv_batch *batch = &cmd_buffer->batch;
+   ANV_FROM_HANDLE(anv_device, device, _device);
+   ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool);
 
-   cmd_buffer->execbuf2.bo_count = 0;
-   cmd_buffer->execbuf2.need_reloc = false;
+   anv_ResetCommandPool(_device, commandPool, 0);
+
+   anv_free2(&device->alloc, pAllocator, pool);
+}
 
-   /* Add surface state bos first so we can add them with their relocs. */
-   list_for_each_entry(struct anv_batch_bo, bbo,
-                       &cmd_buffer->surface_bos, link) {
-      anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
-                            &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
-                            bbo->num_relocs);
+VkResult anv_ResetCommandPool(
+    VkDevice                                    device,
+    VkCommandPool                               commandPool,
+    VkCommandPoolResetFlags                     flags)
+{
+   ANV_FROM_HANDLE(anv_cmd_pool, pool, commandPool);
+
+   list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer,
+                            &pool->cmd_buffers, pool_link) {
+      anv_cmd_buffer_destroy(cmd_buffer);
    }
 
-   /* Add all of the BOs referenced by surface state */
-   anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
+   return VK_SUCCESS;
+}
+
+/**
+ * Return NULL if the current subpass has no depthstencil attachment.
+ */
+const struct anv_image_view *
+anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer)
+{
+   const struct anv_subpass *subpass = cmd_buffer->state.subpass;
+   const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
 
-   struct anv_batch_bo *first_batch_bo =
-      LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.next, link);
+   if (subpass->depth_stencil_attachment == VK_ATTACHMENT_UNUSED)
+      return NULL;
 
-   /* Add all but the first batch BO */
-   list_for_each_entry(struct anv_batch_bo, bbo, &cmd_buffer->batch_bos, link) {
-      if (bbo == first_batch_bo)
-         continue;
+   const struct anv_image_view *iview =
+      fb->attachments[subpass->depth_stencil_attachment];
 
-      anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
-                            &batch->relocs.relocs[bbo->first_reloc],
-                            bbo->num_relocs);
-   }
+   assert(anv_format_is_depth_or_stencil(iview->format));
 
-   /* Add everything referenced by the batches */
-   anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
-
-   /* Add the first batch bo last */
-   anv_cmd_buffer_add_bo(cmd_buffer, &first_batch_bo->bo,
-                         &batch->relocs.relocs[first_batch_bo->first_reloc],
-                         first_batch_bo->num_relocs);
-   assert(first_batch_bo->bo.index == cmd_buffer->execbuf2.bo_count - 1);
-
-   anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
-   anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
-
-   cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
-      .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
-      .buffer_count = cmd_buffer->execbuf2.bo_count,
-      .batch_start_offset = 0,
-      .batch_len = batch->next - batch->start,
-      .cliprects_ptr = 0,
-      .num_cliprects = 0,
-      .DR1 = 0,
-      .DR4 = 0,
-      .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
-      .rsvd1 = cmd_buffer->device->context_id,
-      .rsvd2 = 0,
-   };
-
-   if (!cmd_buffer->execbuf2.need_reloc)
-      cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC;
+   return iview;
 }