anv/pipeline: Handle output lowering in anv_pipeline instead of spirv_to_nir
[mesa.git] / src / vulkan / gen7_cmd_buffer.c
index d1a583d0bdf1564ef7126ba01036216c56a29bfe..e69bf47782e05d4369938e7bcb5cb076e5aab480 100644 (file)
 #include "gen7_pack.h"
 #include "gen75_pack.h"
 
-static void
+static uint32_t
 cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
    static const uint32_t push_constant_opcodes[] = {
-      [VK_SHADER_STAGE_VERTEX]                  = 21,
-      [VK_SHADER_STAGE_TESS_CONTROL]            = 25, /* HS */
-      [VK_SHADER_STAGE_TESS_EVALUATION]         = 26, /* DS */
-      [VK_SHADER_STAGE_GEOMETRY]                = 22,
-      [VK_SHADER_STAGE_FRAGMENT]                = 23,
-      [VK_SHADER_STAGE_COMPUTE]                 = 0,
+      [MESA_SHADER_VERTEX]                      = 21,
+      [MESA_SHADER_TESS_CTRL]                   = 25, /* HS */
+      [MESA_SHADER_TESS_EVAL]                   = 26, /* DS */
+      [MESA_SHADER_GEOMETRY]                    = 22,
+      [MESA_SHADER_FRAGMENT]                    = 23,
+      [MESA_SHADER_COMPUTE]                     = 0,
    };
 
-   VkShaderStage stage;
    VkShaderStageFlags flushed = 0;
 
-   for_each_bit(stage, cmd_buffer->state.push_constants_dirty) {
+   anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) {
       struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
 
       if (state.offset == 0)
@@ -60,151 +59,67 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
                         .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
                      });
 
-      flushed |= 1 << stage;
+      flushed |= mesa_to_vk_shader_stage(stage);
    }
 
    cmd_buffer->state.push_constants_dirty &= ~flushed;
-}
 
-GENX_FUNC(GEN7, GEN7) void
-genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
-{
-   struct anv_device *device = cmd_buffer->device;
-   struct anv_bo *scratch_bo = NULL;
-
-   cmd_buffer->state.scratch_size =
-      anv_block_pool_size(&device->scratch_block_pool);
-   if (cmd_buffer->state.scratch_size > 0)
-      scratch_bo = &device->scratch_block_pool.bo;
-
-   anv_batch_emit(&cmd_buffer->batch, GEN7_STATE_BASE_ADDRESS,
-      .GeneralStateBaseAddress                  = { scratch_bo, 0 },
-      .GeneralStateMemoryObjectControlState     = GEN7_MOCS,
-      .GeneralStateBaseAddressModifyEnable      = true,
-      .GeneralStateAccessUpperBound             = { scratch_bo, scratch_bo->size },
-      .GeneralStateAccessUpperBoundModifyEnable = true,
-
-      .SurfaceStateBaseAddress                  = anv_cmd_buffer_surface_base_address(cmd_buffer),
-      .SurfaceStateMemoryObjectControlState     = GEN7_MOCS,
-      .SurfaceStateBaseAddressModifyEnable      = true,
-
-      .DynamicStateBaseAddress                  = { &device->dynamic_state_block_pool.bo, 0 },
-      .DynamicStateMemoryObjectControlState     = GEN7_MOCS,
-      .DynamicStateBaseAddressModifyEnable      = true,
-
-      .IndirectObjectBaseAddress                = { NULL, 0 },
-      .IndirectObjectMemoryObjectControlState   = GEN7_MOCS,
-      .IndirectObjectBaseAddressModifyEnable    = true,
-
-      .IndirectObjectAccessUpperBound           = { NULL, 0xffffffff },
-      .IndirectObjectAccessUpperBoundModifyEnable = true,
-
-      .InstructionBaseAddress                   = { &device->instruction_block_pool.bo, 0 },
-      .InstructionMemoryObjectControlState      = GEN7_MOCS,
-      .InstructionBaseAddressModifyEnable       = true,
-      .InstructionAccessUpperBound              =  { &device->instruction_block_pool.bo,
-                                                     device->instruction_block_pool.bo.size },
-      .InstructionAccessUpperBoundModifyEnable  = true);
-
-   /* After re-setting the surface state base address, we have to do some
-    * cache flusing so that the sampler engine will pick up the new
-    * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
-    * Shared Function > 3D Sampler > State > State Caching (page 96):
-    *
-    *    Coherency with system memory in the state cache, like the texture
-    *    cache is handled partially by software. It is expected that the
-    *    command stream or shader will issue Cache Flush operation or
-    *    Cache_Flush sampler message to ensure that the L1 cache remains
-    *    coherent with system memory.
-    *
-    *    [...]
-    *
-    *    Whenever the value of the Dynamic_State_Base_Addr,
-    *    Surface_State_Base_Addr are altered, the L1 state cache must be
-    *    invalidated to ensure the new surface or sampler state is fetched
-    *    from system memory.
-    *
-    * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
-    * which, according the PIPE_CONTROL instruction documentation in the
-    * Broadwell PRM:
-    *
-    *    Setting this bit is independent of any other bit in this packet.
-    *    This bit controls the invalidation of the L1 and L2 state caches
-    *    at the top of the pipe i.e. at the parsing time.
-    *
-    * Unfortunately, experimentation seems to indicate that state cache
-    * invalidation through a PIPE_CONTROL does nothing whatsoever in
-    * regards to surface state and binding tables.  In stead, it seems that
-    * invalidating the texture cache is what is actually needed.
-    *
-    * XXX:  As far as we have been able to determine through
-    * experimentation, shows that flush the texture cache appears to be
-    * sufficient.  The theory here is that all of the sampling/rendering
-    * units cache the binding table in the texture cache.  However, we have
-    * yet to be able to actually confirm this.
-    */
-   anv_batch_emit(&cmd_buffer->batch, GEN7_PIPE_CONTROL,
-                  .TextureCacheInvalidationEnable = true);
+   return flushed;
 }
 
-static VkResult
-flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, VkShaderStage stage)
+GENX_FUNC(GEN7, GEN7) void
+genX(cmd_buffer_emit_descriptor_pointers)(struct anv_cmd_buffer *cmd_buffer,
+                                          uint32_t stages)
 {
-   struct anv_state surfaces = { 0, }, samplers = { 0, };
-   VkResult result;
-
-   result = anv_cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
-   if (result != VK_SUCCESS)
-      return result;
-   result = anv_cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
-   if (result != VK_SUCCESS)
-      return result;
-
    static const uint32_t sampler_state_opcodes[] = {
-      [VK_SHADER_STAGE_VERTEX]                  = 43,
-      [VK_SHADER_STAGE_TESS_CONTROL]            = 44, /* HS */
-      [VK_SHADER_STAGE_TESS_EVALUATION]         = 45, /* DS */
-      [VK_SHADER_STAGE_GEOMETRY]                = 46,
-      [VK_SHADER_STAGE_FRAGMENT]                = 47,
-      [VK_SHADER_STAGE_COMPUTE]                 = 0,
+      [MESA_SHADER_VERTEX]                      = 43,
+      [MESA_SHADER_TESS_CTRL]                   = 44, /* HS */
+      [MESA_SHADER_TESS_EVAL]                   = 45, /* DS */
+      [MESA_SHADER_GEOMETRY]                    = 46,
+      [MESA_SHADER_FRAGMENT]                    = 47,
+      [MESA_SHADER_COMPUTE]                     = 0,
    };
 
    static const uint32_t binding_table_opcodes[] = {
-      [VK_SHADER_STAGE_VERTEX]                  = 38,
-      [VK_SHADER_STAGE_TESS_CONTROL]            = 39,
-      [VK_SHADER_STAGE_TESS_EVALUATION]         = 40,
-      [VK_SHADER_STAGE_GEOMETRY]                = 41,
-      [VK_SHADER_STAGE_FRAGMENT]                = 42,
-      [VK_SHADER_STAGE_COMPUTE]                 = 0,
+      [MESA_SHADER_VERTEX]                      = 38,
+      [MESA_SHADER_TESS_CTRL]                   = 39,
+      [MESA_SHADER_TESS_EVAL]                   = 40,
+      [MESA_SHADER_GEOMETRY]                    = 41,
+      [MESA_SHADER_FRAGMENT]                    = 42,
+      [MESA_SHADER_COMPUTE]                     = 0,
    };
 
-   if (samplers.alloc_size > 0) {
-      anv_batch_emit(&cmd_buffer->batch,
-                     GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS,
-                     ._3DCommandSubOpcode  = sampler_state_opcodes[stage],
-                     .PointertoVSSamplerState = samplers.offset);
-   }
+   anv_foreach_stage(s, stages) {
+      if (cmd_buffer->state.samplers[s].alloc_size > 0) {
+         anv_batch_emit(&cmd_buffer->batch,
+                        GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS,
+                        ._3DCommandSubOpcode  = sampler_state_opcodes[s],
+                        .PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset);
+      }
 
-   if (surfaces.alloc_size > 0) {
+      /* Always emit binding table pointers if we're asked to, since on SKL
+       * this is what flushes push constants. */
       anv_batch_emit(&cmd_buffer->batch,
                      GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS,
-                     ._3DCommandSubOpcode  = binding_table_opcodes[stage],
-                     .PointertoVSBindingTable = surfaces.offset);
+                     ._3DCommandSubOpcode  = binding_table_opcodes[s],
+                     .PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset);
    }
-
-   return VK_SUCCESS;
 }
 
-GENX_FUNC(GEN7, GEN7) void
+GENX_FUNC(GEN7, GEN7) uint32_t
 genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer)
 {
-   VkShaderStage s;
    VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
                               cmd_buffer->state.pipeline->active_stages;
 
    VkResult result = VK_SUCCESS;
-   for_each_bit(s, dirty) {
-      result = flush_descriptor_set(cmd_buffer, s);
+   anv_foreach_stage(s, dirty) {
+      result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
+                                            &cmd_buffer->state.samplers[s]);
+      if (result != VK_SUCCESS)
+         break;
+      result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
+                                                 &cmd_buffer->state.binding_tables[s]);
       if (result != VK_SUCCESS)
          break;
    }
@@ -221,15 +136,22 @@ genX(cmd_buffer_flush_descriptor_sets)(struct anv_cmd_buffer *cmd_buffer)
       anv_cmd_buffer_emit_state_base_address(cmd_buffer);
 
       /* Re-emit all active binding tables */
-      for_each_bit(s, cmd_buffer->state.pipeline->active_stages) {
-         result = flush_descriptor_set(cmd_buffer, s);
-
-         /* It had better succeed this time */
-         assert(result == VK_SUCCESS);
+      dirty |= cmd_buffer->state.pipeline->active_stages;
+      anv_foreach_stage(s, dirty) {
+         result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
+                                               &cmd_buffer->state.samplers[s]);
+         if (result != VK_SUCCESS)
+            return result;
+         result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
+                                                    &cmd_buffer->state.binding_tables[s]);
+         if (result != VK_SUCCESS)
+            return result;
       }
    }
 
-   cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages;
+   cmd_buffer->state.descriptors_dirty &= ~dirty;
+
+   return dirty;
 }
 
 static inline int64_t
@@ -284,6 +206,9 @@ emit_scissor_state(struct anv_cmd_buffer *cmd_buffer,
 
    anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS,
                   .ScissorRectPointer = scissor_state.offset);
+
+   if (!cmd_buffer->device->info.has_llc)
+      anv_state_clflush(scissor_state);
 }
 
 GENX_FUNC(GEN7, GEN7) void
@@ -316,12 +241,12 @@ static const uint32_t restart_index_for_type[] = {
 };
 
 void genX(CmdBindIndexBuffer)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     VkBuffer                                    _buffer,
     VkDeviceSize                                offset,
     VkIndexType                                 indexType)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
 
    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
@@ -341,27 +266,27 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
    VkResult result;
 
    result = anv_cmd_buffer_emit_samplers(cmd_buffer,
-                                         VK_SHADER_STAGE_COMPUTE, &samplers);
+                                         MESA_SHADER_COMPUTE, &samplers);
    if (result != VK_SUCCESS)
       return result;
    result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
-                                               VK_SHADER_STAGE_COMPUTE, &surfaces);
+                                              MESA_SHADER_COMPUTE, &surfaces);
    if (result != VK_SUCCESS)
       return result;
 
-   struct GEN7_INTERFACE_DESCRIPTOR_DATA desc = {
-      .KernelStartPointer = pipeline->cs_simd,
-      .BindingTablePointer = surfaces.offset,
-      .SamplerStatePointer = samplers.offset,
-      .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
-   };
+   const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
 
-   uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
    struct anv_state state =
-      anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
-
-   GEN7_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
-
+      anv_state_pool_emit(&device->dynamic_state_pool,
+                          GEN7_INTERFACE_DESCRIPTOR_DATA, 64,
+                          .KernelStartPointer = pipeline->cs_simd,
+                          .BindingTablePointer = surfaces.offset,
+                          .SamplerStatePointer = samplers.offset,
+                          .BarrierEnable = cs_prog_data->uses_barrier,
+                          .NumberofThreadsinGPGPUThreadGroup =
+                             pipeline->cs_thread_width_max);
+
+   const uint32_t size = GEN7_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
    anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
                   .InterfaceDescriptorTotalLength = size,
                   .InterfaceDescriptorDataStartAddress = state.offset);
@@ -391,7 +316,7 @@ cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
       /* FIXME: figure out descriptors for gen7 */
       result = flush_compute_descriptor_set(cmd_buffer);
       assert(result == VK_SUCCESS);
-      cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
+      cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
    }
 
    cmd_buffer->state.compute_dirty = 0;
@@ -469,8 +394,11 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
                      .Address = { &cmd_buffer->device->workaround_bo, 0 });
    }
 
-   if (cmd_buffer->state.descriptors_dirty)
-      gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer);
+   uint32_t dirty = 0;
+   if (cmd_buffer->state.descriptors_dirty) {
+      dirty = gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer);
+      gen7_cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
+   }
 
    if (cmd_buffer->state.push_constants_dirty)
       cmd_buffer_flush_push_constants(cmd_buffer);
@@ -490,7 +418,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
                                   ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) {
 
       bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f ||
-         cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f;
+         cmd_buffer->state.dynamic.depth_bias.slope != 0.0f;
 
       uint32_t sf_dw[GEN7_3DSTATE_SF_length];
       struct GEN7_3DSTATE_SF sf = {
@@ -500,7 +428,7 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
          .GlobalDepthOffsetEnableWireframe = enable_bias,
          .GlobalDepthOffsetEnablePoint = enable_bias,
          .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
-         .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope_scaled,
+         .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
          .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
       };
       GEN7_3DSTATE_SF_pack(NULL, sf_dw, &sf);
@@ -512,7 +440,8 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
       struct anv_state cc_state =
          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
-                                            GEN7_COLOR_CALC_STATE_length, 64);
+                                            GEN7_COLOR_CALC_STATE_length * 4,
+                                            64);
       struct GEN7_COLOR_CALC_STATE cc = {
          .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
          .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
@@ -524,6 +453,8 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
             cmd_buffer->state.dynamic.stencil_reference.back,
       };
       GEN7_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
+      if (!cmd_buffer->device->info.has_llc)
+         anv_state_clflush(cc_state);
 
       anv_batch_emit(&cmd_buffer->batch,
                      GEN7_3DSTATE_CC_STATE_POINTERS,
@@ -531,14 +462,16 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
    }
 
    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
+                                  ANV_CMD_DIRTY_RENDER_TARGETS |
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
       uint32_t depth_stencil_dw[GEN7_DEPTH_STENCIL_STATE_length];
 
+      const struct anv_image_view *iview =
+         anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
+
       struct GEN7_DEPTH_STENCIL_STATE depth_stencil = {
-         /* Is this what we need to do? */
-         .StencilBufferWriteEnable =
-            cmd_buffer->state.dynamic.stencil_write_mask.front != 0,
+         .StencilBufferWriteEnable = iview && (iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT),
 
          .StencilTestMask =
             cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff,
@@ -587,13 +520,13 @@ cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
 }
 
 void genX(CmdDraw)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     uint32_t                                    vertexCount,
     uint32_t                                    instanceCount,
     uint32_t                                    firstVertex,
     uint32_t                                    firstInstance)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
 
    cmd_buffer_flush_state(cmd_buffer);
@@ -609,14 +542,14 @@ void genX(CmdDraw)(
 }
 
 void genX(CmdDrawIndexed)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     uint32_t                                    indexCount,
     uint32_t                                    instanceCount,
     uint32_t                                    firstIndex,
     int32_t                                     vertexOffset,
     uint32_t                                    firstInstance)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
 
    cmd_buffer_flush_state(cmd_buffer);
@@ -657,13 +590,13 @@ gen7_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
 #define GEN7_3DPRIM_BASE_VERTEX         0x2440
 
 void genX(CmdDrawIndirect)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     VkBuffer                                    _buffer,
     VkDeviceSize                                offset,
-    uint32_t                                    count,
+    uint32_t                                    drawCount,
     uint32_t                                    stride)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
    struct anv_bo *bo = buffer->bo;
@@ -684,13 +617,13 @@ void genX(CmdDrawIndirect)(
 }
 
 void genX(CmdDrawIndexedIndirect)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     VkBuffer                                    _buffer,
     VkDeviceSize                                offset,
-    uint32_t                                    count,
+    uint32_t                                    drawCount,
     uint32_t                                    stride)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
    struct anv_bo *bo = buffer->bo;
@@ -711,12 +644,12 @@ void genX(CmdDrawIndexedIndirect)(
 }
 
 void genX(CmdDispatch)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     uint32_t                                    x,
     uint32_t                                    y,
     uint32_t                                    z)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
    struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
 
@@ -726,7 +659,7 @@ void genX(CmdDispatch)(
                   .SIMDSize = prog_data->simd_size / 16,
                   .ThreadDepthCounterMaximum = 0,
                   .ThreadHeightCounterMaximum = 0,
-                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
+                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
                   .ThreadGroupIDXDimension = x,
                   .ThreadGroupIDYDimension = y,
                   .ThreadGroupIDZDimension = z,
@@ -741,11 +674,11 @@ void genX(CmdDispatch)(
 #define GPGPU_DISPATCHDIMZ 0x2508
 
 void genX(CmdDispatchIndirect)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     VkBuffer                                    _buffer,
     VkDeviceSize                                offset)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
    struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
@@ -763,24 +696,13 @@ void genX(CmdDispatchIndirect)(
                   .SIMDSize = prog_data->simd_size / 16,
                   .ThreadDepthCounterMaximum = 0,
                   .ThreadHeightCounterMaximum = 0,
-                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
+                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
                   .RightExecutionMask = pipeline->cs_right_mask,
                   .BottomExecutionMask = 0xffffffff);
 
    anv_batch_emit(&cmd_buffer->batch, GEN7_MEDIA_STATE_FLUSH);
 }
 
-void genX(CmdPipelineBarrier)(
-    VkCmdBuffer                                 cmdBuffer,
-    VkPipelineStageFlags                        srcStageMask,
-    VkPipelineStageFlags                        destStageMask,
-    VkBool32                                    byRegion,
-    uint32_t                                    memBarrierCount,
-    const void* const*                          ppMemBarriers)
-{
-   stub();
-}
-
 static void
 cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
 {
@@ -788,18 +710,23 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
    const struct anv_image_view *iview =
       anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
    const struct anv_image *image = iview ? iview->image : NULL;
-   const bool has_depth = iview && iview->format->depth_format;
-   const bool has_stencil = iview && iview->format->has_stencil;
+
+   /* XXX: isl needs to grow depth format support */
+   const struct anv_format *anv_format =
+      iview ? anv_format_for_vk_format(iview->vk_format) : NULL;
+
+   const bool has_depth = iview && anv_format->depth_format;
+   const bool has_stencil = iview && anv_format->has_stencil;
 
    /* Emit 3DSTATE_DEPTH_BUFFER */
    if (has_depth) {
-      anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER,
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
          .SurfaceType = SURFTYPE_2D,
-         .DepthWriteEnable = iview->format->depth_format,
+         .DepthWriteEnable = true,
          .StencilWriteEnable = has_stencil,
          .HierarchicalDepthBufferEnable = false,
-         .SurfaceFormat = iview->format->depth_format,
-         .SurfacePitch = image->depth_surface.stride - 1,
+         .SurfaceFormat = anv_format->depth_format,
+         .SurfacePitch = image->depth_surface.isl.row_pitch - 1,
          .SurfaceBaseAddress = {
             .bo = image->bo,
             .offset = image->depth_surface.offset,
@@ -809,7 +736,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
          .LOD = 0,
          .Depth = 1 - 1,
          .MinimumArrayElement = 0,
-         .DepthBufferObjectControlState = GEN7_MOCS,
+         .DepthBufferObjectControlState = GENX(MOCS),
          .RenderTargetViewExtent = 1 - 1);
    } else {
       /* Even when no depth buffer is present, the hardware requires that
@@ -829,7 +756,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
        * actual framebuffer's width and height, even when neither depth buffer
        * nor stencil buffer is present.
        */
-      anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_DEPTH_BUFFER,
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
          .SurfaceType = SURFTYPE_2D,
          .SurfaceFormat = D16_UNORM,
          .Width = fb->width - 1,
@@ -839,15 +766,18 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
 
    /* Emit 3DSTATE_STENCIL_BUFFER */
    if (has_stencil) {
-      anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_STENCIL_BUFFER,
-         .StencilBufferObjectControlState = GEN7_MOCS,
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER),
+#     if (ANV_IS_HASWELL)
+         .StencilBufferEnable = true,
+#     endif
+         .StencilBufferObjectControlState = GENX(MOCS),
 
          /* Stencil buffers have strange pitch. The PRM says:
           *
           *    The pitch must be set to 2x the value computed based on width,
           *    as the stencil buffer is stored with two rows interleaved.
           */
-         .SurfacePitch = 2 * image->stencil_surface.stride - 1,
+         .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1,
 
          .SurfaceBaseAddress = {
             .bo = image->bo,
@@ -870,6 +800,7 @@ genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer,
 {
    cmd_buffer->state.subpass = subpass;
    cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
+   cmd_buffer->state.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
 
    cmd_buffer_emit_depth_stencil(cmd_buffer);
 }
@@ -901,11 +832,11 @@ begin_render_pass(struct anv_cmd_buffer *cmd_buffer,
 }
 
 void genX(CmdBeginRenderPass)(
-    VkCmdBuffer                                 cmdBuffer,
+    VkCommandBuffer                             commandBuffer,
     const VkRenderPassBeginInfo*                pRenderPassBegin,
-    VkRenderPassContents                        contents)
+    VkSubpassContents                           contents)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
    ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
 
    begin_render_pass(cmd_buffer, pRenderPassBegin);
@@ -914,20 +845,20 @@ void genX(CmdBeginRenderPass)(
 }
 
 void genX(CmdNextSubpass)(
-    VkCmdBuffer                                 cmdBuffer,
-    VkRenderPassContents                        contents)
+    VkCommandBuffer                             commandBuffer,
+    VkSubpassContents                           contents)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
+   assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
    gen7_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
 }
 
 void genX(CmdEndRenderPass)(
-    VkCmdBuffer                                 cmdBuffer)
+    VkCommandBuffer                             commandBuffer)
 {
-   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 
    /* Emit a flushing pipe control at the end of a pass.  This is kind of a
     * hack but it ensures that render targets always actually get written.
@@ -943,3 +874,31 @@ void genX(CmdEndRenderPass)(
                   .TextureCacheInvalidationEnable = true,
                   .CommandStreamerStallEnable = true);
 }
+
+void genX(CmdSetEvent)(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     event,
+    VkPipelineStageFlags                        stageMask)
+{
+   stub();
+}
+
+void genX(CmdResetEvent)(
+    VkCommandBuffer                             commandBuffer,
+    VkEvent                                     event,
+    VkPipelineStageFlags                        stageMask)
+{
+   stub();
+}
+
+void genX(CmdWaitEvents)(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    eventCount,
+    const VkEvent*                              pEvents,
+    VkPipelineStageFlags                        srcStageMask,
+    VkPipelineStageFlags                        destStageMask,
+    uint32_t                                    memBarrierCount,
+    const void* const*                          ppMemBarriers)
+{
+   stub();
+}