winsys/amdgpu: avoid ioctl call when fence_wait is called without timeout

[mesa.git] / src / intel / vulkan / genX_cmd_buffer.c
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c

index c3d2043dcdfc20c91444ba912e8f82520a147f7d..ee47c2926e0579c00346a16d7eecc6b1b84c4081 100644 (file)
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -49,46 +49,50 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
      * this, we get GPU hangs when using multi-level command buffers which
      * clear depth, reset state base address, and then go render stuff.
      */
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                  .RenderTargetCacheFlushEnable = true);
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.RenderTargetCacheFlushEnable = true;
+   }
  #endif
  
-   anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS),
-      .GeneralStateBaseAddress = { scratch_bo, 0 },
-      .GeneralStateMemoryObjectControlState = GENX(MOCS),
-      .GeneralStateBaseAddressModifyEnable = true,
+   anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
+      sba.GeneralStateBaseAddress = (struct anv_address) { scratch_bo, 0 };
+      sba.GeneralStateMemoryObjectControlState = GENX(MOCS);
+      sba.GeneralStateBaseAddressModifyEnable = true;
  
-      .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer),
-      .SurfaceStateMemoryObjectControlState = GENX(MOCS),
-      .SurfaceStateBaseAddressModifyEnable = true,
+      sba.SurfaceStateBaseAddress =
+         anv_cmd_buffer_surface_base_address(cmd_buffer);
+      sba.SurfaceStateMemoryObjectControlState = GENX(MOCS);
+      sba.SurfaceStateBaseAddressModifyEnable = true;
  
-      .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
-      .DynamicStateMemoryObjectControlState = GENX(MOCS),
-      .DynamicStateBaseAddressModifyEnable = true,
+      sba.DynamicStateBaseAddress =
+         (struct anv_address) { &device->dynamic_state_block_pool.bo, 0 };
+      sba.DynamicStateMemoryObjectControlState = GENX(MOCS),
+      sba.DynamicStateBaseAddressModifyEnable = true,
  
-      .IndirectObjectBaseAddress = { NULL, 0 },
-      .IndirectObjectMemoryObjectControlState = GENX(MOCS),
-      .IndirectObjectBaseAddressModifyEnable = true,
+      sba.IndirectObjectBaseAddress = (struct anv_address) { NULL, 0 };
+      sba.IndirectObjectMemoryObjectControlState = GENX(MOCS);
+      sba.IndirectObjectBaseAddressModifyEnable = true;
  
-      .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
-      .InstructionMemoryObjectControlState = GENX(MOCS),
-      .InstructionBaseAddressModifyEnable = true,
+      sba.InstructionBaseAddress =
+         (struct anv_address) { &device->instruction_block_pool.bo, 0 };
+      sba.InstructionMemoryObjectControlState = GENX(MOCS);
+      sba.InstructionBaseAddressModifyEnable = true;
  
  #  if (GEN_GEN >= 8)
        /* Broadwell requires that we specify a buffer size for a bunch of
         * these fields.  However, since we will be growing the BO's live, we
         * just set them all to the maximum.
         */
-      .GeneralStateBufferSize = 0xfffff,
-      .GeneralStateBufferSizeModifyEnable = true,
-      .DynamicStateBufferSize = 0xfffff,
-      .DynamicStateBufferSizeModifyEnable = true,
-      .IndirectObjectBufferSize = 0xfffff,
-      .IndirectObjectBufferSizeModifyEnable = true,
-      .InstructionBufferSize = 0xfffff,
-      .InstructionBuffersizeModifyEnable = true,
+      sba.GeneralStateBufferSize                = 0xfffff;
+      sba.GeneralStateBufferSizeModifyEnable    = true;
+      sba.DynamicStateBufferSize                = 0xfffff;
+      sba.DynamicStateBufferSizeModifyEnable    = true;
+      sba.IndirectObjectBufferSize              = 0xfffff;
+      sba.IndirectObjectBufferSizeModifyEnable  = true;
+      sba.InstructionBufferSize                 = 0xfffff;
+      sba.InstructionBuffersizeModifyEnable     = true;
  #  endif
-   );
+   }
  
     /* After re-setting the surface state base address, we have to do some
      * cache flusing so that the sampler engine will pick up the new
@@ -127,8 +131,9 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
      * units cache the binding table in the texture cache.  However, we have
      * yet to be able to actually confirm this.
      */
-   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                  .TextureCacheInvalidationEnable = true);
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+      pc.TextureCacheInvalidationEnable = true;
+   }
  }
  
  void genX(CmdPipelineBarrier)(
@@ -290,20 +295,21 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
        struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
  
        if (state.offset == 0) {
-         anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
-                        ._3DCommandSubOpcode = push_constant_opcodes[stage]);
+         anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c)
+            c._3DCommandSubOpcode = push_constant_opcodes[stage];
        } else {
-         anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
-                        ._3DCommandSubOpcode = push_constant_opcodes[stage],
-                        .ConstantBody = {
+         anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c) {
+            c._3DCommandSubOpcode = push_constant_opcodes[stage],
+            c.ConstantBody = (struct GENX(3DSTATE_CONSTANT_BODY)) {
  #if GEN_GEN >= 9
-                           .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset },
-                           .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
+               .PointerToConstantBuffer2 = { &cmd_buffer->device->dynamic_state_block_pool.bo, state.offset },
+               .ConstantBuffer2ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
  #else
-                           .PointerToConstantBuffer0 = { .offset = state.offset },
-                           .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
+               .PointerToConstantBuffer0 = { .offset = state.offset },
+               .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
  #endif
-                        });
+            };
+         }
        }
  
        flushed |= mesa_to_vk_shader_stage(stage);
@@ -324,7 +330,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
  
     assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
  
-   genX(cmd_buffer_config_l3)(cmd_buffer, false);
+   genX(cmd_buffer_config_l3)(cmd_buffer, pipeline);
  
     genX(flush_pipeline_select_3d)(cmd_buffer);
  
@@ -403,10 +409,12 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
         *    PIPE_CONTROL needs to be sent before any combination of VS
         *    associated 3DSTATE."
         */
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .DepthStallEnable = true,
-                     .PostSyncOperation = WriteImmediateData,
-                     .Address = { &cmd_buffer->device->workaround_bo, 0 });
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.DepthStallEnable  = true;
+         pc.PostSyncOperation = WriteImmediateData;
+         pc.Address           =
+            (struct anv_address) { &cmd_buffer->device->workaround_bo, 0 };
+      }
     }
  #endif
  
@@ -502,14 +510,15 @@ void genX(CmdDraw)(
     if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
        emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
  
-   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
-      .VertexAccessType                         = SEQUENTIAL,
-      .PrimitiveTopologyType                    = pipeline->topology,
-      .VertexCountPerInstance                   = vertexCount,
-      .StartVertexLocation                      = firstVertex,
-      .InstanceCount                            = instanceCount,
-      .StartInstanceLocation                    = firstInstance,
-      .BaseVertexLocation                       = 0);
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+      prim.VertexAccessType         = SEQUENTIAL;
+      prim.PrimitiveTopologyType    = pipeline->topology;
+      prim.VertexCountPerInstance   = vertexCount;
+      prim.StartVertexLocation      = firstVertex;
+      prim.InstanceCount            = instanceCount;
+      prim.StartInstanceLocation    = firstInstance;
+      prim.BaseVertexLocation       = 0;
+   }
  }
  
  void genX(CmdDrawIndexed)(
@@ -529,14 +538,15 @@ void genX(CmdDrawIndexed)(
     if (vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
        emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance);
  
-   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
-      .VertexAccessType                         = RANDOM,
-      .PrimitiveTopologyType                    = pipeline->topology,
-      .VertexCountPerInstance                   = indexCount,
-      .StartVertexLocation                      = firstIndex,
-      .InstanceCount                            = instanceCount,
-      .StartInstanceLocation                    = firstInstance,
-      .BaseVertexLocation                       = vertexOffset);
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+      prim.VertexAccessType         = RANDOM;
+      prim.PrimitiveTopologyType    = pipeline->topology;
+      prim.VertexCountPerInstance   = indexCount;
+      prim.StartVertexLocation      = firstIndex;
+      prim.InstanceCount            = instanceCount;
+      prim.StartInstanceLocation    = firstInstance;
+      prim.BaseVertexLocation       = vertexOffset;
+   }
  }
  
  /* Auto-Draw / Indirect Registers */
@@ -551,17 +561,19 @@ static void
  emit_lrm(struct anv_batch *batch,
           uint32_t reg, struct anv_bo *bo, uint32_t offset)
  {
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
-                  .RegisterAddress = reg,
-                  .MemoryAddress = { bo, offset });
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = reg;
+      lrm.MemoryAddress    = (struct anv_address) { bo, offset };
+   }
  }
  
  static void
  emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
  {
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),
-                  .RegisterOffset = reg,
-                  .DataDWord = imm);
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
+      lri.RegisterOffset   = reg;
+      lri.DataDWord        = imm;
+   }
  }
  
  void genX(CmdDrawIndirect)(
@@ -589,10 +601,11 @@ void genX(CmdDrawIndirect)(
     emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
     emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
  
-   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
-      .IndirectParameterEnable                  = true,
-      .VertexAccessType                         = SEQUENTIAL,
-      .PrimitiveTopologyType                    = pipeline->topology);
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+      prim.IndirectParameterEnable  = true;
+      prim.VertexAccessType         = SEQUENTIAL;
+      prim.PrimitiveTopologyType    = pipeline->topology;
+   }
  }
  
  void genX(CmdDrawIndexedIndirect)(
@@ -621,12 +634,31 @@ void genX(CmdDrawIndexedIndirect)(
     emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
     emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
  
-   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
-      .IndirectParameterEnable                  = true,
-      .VertexAccessType                         = RANDOM,
-      .PrimitiveTopologyType                    = pipeline->topology);
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
+      prim.IndirectParameterEnable  = true;
+      prim.VertexAccessType         = RANDOM;
+      prim.PrimitiveTopologyType    = pipeline->topology;
+   }
+}
+
+#if GEN_GEN == 7
+
+static bool
+verify_cmd_parser(const struct anv_device *device,
+                  int required_version,
+                  const char *function)
+{
+   if (device->instance->physicalDevice.cmd_parser_version < required_version) {
+      vk_errorf(VK_ERROR_FEATURE_NOT_PRESENT,
+                "cmd parser version %d is required for %s",
+                required_version, function);
+      return false;
+   } else {
+      return true;
+   }
  }
  
+#endif
  
  void genX(CmdDispatch)(
      VkCommandBuffer                             commandBuffer,
@@ -654,18 +686,19 @@ void genX(CmdDispatch)(
  
     genX(cmd_buffer_flush_compute_state)(cmd_buffer);
  
-   anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
-                  .SIMDSize = prog_data->simd_size / 16,
-                  .ThreadDepthCounterMaximum = 0,
-                  .ThreadHeightCounterMaximum = 0,
-                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
-                  .ThreadGroupIDXDimension = x,
-                  .ThreadGroupIDYDimension = y,
-                  .ThreadGroupIDZDimension = z,
-                  .RightExecutionMask = pipeline->cs_right_mask,
-                  .BottomExecutionMask = 0xffffffff);
-
-   anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
+   anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
+      ggw.SIMDSize                     = prog_data->simd_size / 16;
+      ggw.ThreadDepthCounterMaximum    = 0;
+      ggw.ThreadHeightCounterMaximum   = 0;
+      ggw.ThreadWidthCounterMaximum    = pipeline->cs_thread_width_max - 1;
+      ggw.ThreadGroupIDXDimension      = x;
+      ggw.ThreadGroupIDYDimension      = y;
+      ggw.ThreadGroupIDZDimension      = z;
+      ggw.RightExecutionMask           = pipeline->cs_right_mask;
+      ggw.BottomExecutionMask          = 0xffffffff;
+   }
+
+   anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH), msf);
  }
  
  #define GPGPU_DISPATCHDIMX 0x2500
@@ -688,6 +721,14 @@ void genX(CmdDispatchIndirect)(
     uint32_t bo_offset = buffer->offset + offset;
     struct anv_batch *batch = &cmd_buffer->batch;
  
+#if GEN_GEN == 7
+   /* Linux 4.4 added command parser version 5 which allows the GPGPU
+    * indirect dispatch registers to be written.
+    */
+   if (!verify_cmd_parser(cmd_buffer->device, 5, "vkCmdDispatchIndirect"))
+      return;
+#endif
+
     if (prog_data->uses_num_work_groups) {
        cmd_buffer->state.num_workgroups_offset = bo_offset;
        cmd_buffer->state.num_workgroups_bo = bo;
@@ -709,63 +750,134 @@ void genX(CmdDispatchIndirect)(
     emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 0);
  
     /* predicate = (compute_dispatch_indirect_x_size == 0); */
-   anv_batch_emit(batch, GENX(MI_PREDICATE),
-                  .LoadOperation = LOAD_LOAD,
-                  .CombineOperation = COMBINE_SET,
-                  .CompareOperation = COMPARE_SRCS_EQUAL);
+   anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOAD;
+      mip.CombineOperation = COMBINE_SET;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
  
     /* Load compute_dispatch_indirect_y_size into SRC0 */
     emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 4);
  
     /* predicate |= (compute_dispatch_indirect_y_size == 0); */
-   anv_batch_emit(batch, GENX(MI_PREDICATE),
-                  .LoadOperation = LOAD_LOAD,
-                  .CombineOperation = COMBINE_OR,
-                  .CompareOperation = COMPARE_SRCS_EQUAL);
+   anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOAD;
+      mip.CombineOperation = COMBINE_OR;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
  
     /* Load compute_dispatch_indirect_z_size into SRC0 */
     emit_lrm(batch, MI_PREDICATE_SRC0, bo, bo_offset + 8);
  
     /* predicate |= (compute_dispatch_indirect_z_size == 0); */
-   anv_batch_emit(batch, GENX(MI_PREDICATE),
-                  .LoadOperation = LOAD_LOAD,
-                  .CombineOperation = COMBINE_OR,
-                  .CompareOperation = COMPARE_SRCS_EQUAL);
+   anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOAD;
+      mip.CombineOperation = COMBINE_OR;
+      mip.CompareOperation = COMPARE_SRCS_EQUAL;
+   }
  
     /* predicate = !predicate; */
  #define COMPARE_FALSE                           1
-   anv_batch_emit(batch, GENX(MI_PREDICATE),
-                  .LoadOperation = LOAD_LOADINV,
-                  .CombineOperation = COMBINE_OR,
-                  .CompareOperation = COMPARE_FALSE);
+   anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
+      mip.LoadOperation    = LOAD_LOADINV;
+      mip.CombineOperation = COMBINE_OR;
+      mip.CompareOperation = COMPARE_FALSE;
+   }
  #endif
  
-   anv_batch_emit(batch, GENX(GPGPU_WALKER),
-                  .IndirectParameterEnable = true,
-                  .PredicateEnable = GEN_GEN <= 7,
-                  .SIMDSize = prog_data->simd_size / 16,
-                  .ThreadDepthCounterMaximum = 0,
-                  .ThreadHeightCounterMaximum = 0,
-                  .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
-                  .RightExecutionMask = pipeline->cs_right_mask,
-                  .BottomExecutionMask = 0xffffffff);
-
-   anv_batch_emit(batch, GENX(MEDIA_STATE_FLUSH));
+   anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
+      ggw.IndirectParameterEnable      = true;
+      ggw.PredicateEnable              = GEN_GEN <= 7;
+      ggw.SIMDSize                     = prog_data->simd_size / 16;
+      ggw.ThreadDepthCounterMaximum    = 0;
+      ggw.ThreadHeightCounterMaximum   = 0;
+      ggw.ThreadWidthCounterMaximum    = pipeline->cs_thread_width_max - 1;
+      ggw.RightExecutionMask           = pipeline->cs_right_mask;
+      ggw.BottomExecutionMask          = 0xffffffff;
+   }
+
+   anv_batch_emit(batch, GENX(MEDIA_STATE_FLUSH), msf);
+}
+
+static void
+flush_pipeline_before_pipeline_select(struct anv_cmd_buffer *cmd_buffer,
+                                      uint32_t pipeline)
+{
+#if GEN_GEN >= 8 && GEN_GEN < 10
+   /* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
+    *
+    *   Software must clear the COLOR_CALC_STATE Valid field in
+    *   3DSTATE_CC_STATE_POINTERS command prior to send a PIPELINE_SELECT
+    *   with Pipeline Select set to GPGPU.
+    *
+    * The internal hardware docs recommend the same workaround for Gen9
+    * hardware too.
+    */
+   if (pipeline == GPGPU)
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), t);
+#elif GEN_GEN <= 7
+      /* From "BXML » GT » MI » vol1a GPU Overview » [Instruction]
+       * PIPELINE_SELECT [DevBWR+]":
+       *
+       *   Project: DEVSNB+
+       *
+       *   Software must ensure all the write caches are flushed through a
+       *   stalling PIPE_CONTROL command followed by another PIPE_CONTROL
+       *   command to invalidate read only caches prior to programming
+       *   MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
+       */
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.RenderTargetCacheFlushEnable  = true;
+         pc.DepthCacheFlushEnable         = true;
+         pc.DCFlushEnable                 = true;
+         pc.PostSyncOperation             = NoWrite;
+         pc.CommandStreamerStallEnable    = true;
+      }
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.TextureCacheInvalidationEnable   = true;
+         pc.ConstantCacheInvalidationEnable  = true;
+         pc.StateCacheInvalidationEnable     = true;
+         pc.InstructionCacheInvalidateEnable = true;
+         pc.PostSyncOperation                = NoWrite;
+      }
+#endif
  }
  
  void
  genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer)
  {
     if (cmd_buffer->state.current_pipeline != _3D) {
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
+      flush_pipeline_before_pipeline_select(cmd_buffer, _3D);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), ps) {
  #if GEN_GEN >= 9
-                     .MaskBits = 3,
+         ps.MaskBits = 3;
  #endif
-                     .PipelineSelection = _3D);
+         ps.PipelineSelection = _3D;
+      }
+
        cmd_buffer->state.current_pipeline = _3D;
     }
  }
  
+void
+genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer)
+{
+   if (cmd_buffer->state.current_pipeline != GPGPU) {
+      flush_pipeline_before_pipeline_select(cmd_buffer, GPGPU);
+
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT), ps) {
+#if GEN_GEN >= 9
+         ps.MaskBits = 3;
+#endif
+         ps.PipelineSelection = GPGPU;
+      }
+
+      cmd_buffer->state.current_pipeline = GPGPU;
+   }
+}
+
  struct anv_state
  genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer,
                                            struct anv_framebuffer *fb)
@@ -804,38 +916,43 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
     const struct anv_image_view *iview =
        anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
     const struct anv_image *image = iview ? iview->image : NULL;
-   const struct anv_format *anv_format =
-      iview ? anv_format_for_vk_format(iview->vk_format) : NULL;
-   const bool has_depth = iview && anv_format->has_depth;
-   const bool has_stencil = iview && anv_format->has_stencil;
+   const bool has_depth = image && (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
+   const bool has_stencil =
+      image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
  
     /* FIXME: Implement the PMA stall W/A */
     /* FIXME: Width and Height are wrong */
  
     /* Emit 3DSTATE_DEPTH_BUFFER */
     if (has_depth) {
-      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
-         .SurfaceType = SURFTYPE_2D,
-         .DepthWriteEnable = true,
-         .StencilWriteEnable = has_stencil,
-         .HierarchicalDepthBufferEnable = false,
-         .SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev,
-                                                    &image->depth_surface.isl),
-         .SurfacePitch = image->depth_surface.isl.row_pitch - 1,
-         .SurfaceBaseAddress = {
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
+         db.SurfaceType                   = SURFTYPE_2D;
+         db.DepthWriteEnable              = true;
+         db.StencilWriteEnable            = has_stencil;
+         db.HierarchicalDepthBufferEnable = false;
+
+         db.SurfaceFormat = isl_surf_get_depth_format(&device->isl_dev,
+                                                      &image->depth_surface.isl);
+
+         db.SurfaceBaseAddress = (struct anv_address) {
              .bo = image->bo,
              .offset = image->offset + image->depth_surface.offset,
-         },
-         .Height = fb->height - 1,
-         .Width = fb->width - 1,
-         .LOD = 0,
-         .Depth = 1 - 1,
-         .MinimumArrayElement = 0,
-         .DepthBufferObjectControlState = GENX(MOCS),
+         };
+         db.DepthBufferObjectControlState = GENX(MOCS),
+
+         db.SurfacePitch         = image->depth_surface.isl.row_pitch - 1;
+         db.Height               = fb->height - 1;
+         db.Width                = fb->width - 1;
+         db.LOD                  = 0;
+         db.Depth                = 1 - 1;
+         db.MinimumArrayElement  = 0;
+
  #if GEN_GEN >= 8
-         .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2,
+         db.SurfaceQPitch =
+            isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2,
  #endif
-         .RenderTargetViewExtent = 1 - 1);
+         db.RenderTargetViewExtent = 1 - 1;
+      }
     } else {
        /* Even when no depth buffer is present, the hardware requires that
         * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
@@ -855,45 +972,47 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
         * nor stencil buffer is present.  Also, D16_UNORM is not allowed to
         * be combined with a stencil buffer so we use D32_FLOAT instead.
         */
-      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
-         .SurfaceType = SURFTYPE_2D,
-         .SurfaceFormat = D32_FLOAT,
-         .Width = fb->width - 1,
-         .Height = fb->height - 1,
-         .StencilWriteEnable = has_stencil);
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
+         db.SurfaceType          = SURFTYPE_2D;
+         db.SurfaceFormat        = D32_FLOAT;
+         db.Width                = fb->width - 1;
+         db.Height               = fb->height - 1;
+         db.StencilWriteEnable   = has_stencil;
+      }
     }
  
     /* Emit 3DSTATE_STENCIL_BUFFER */
     if (has_stencil) {
-      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER),
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), sb) {
  #if GEN_GEN >= 8 || GEN_IS_HASWELL
-         .StencilBufferEnable = true,
+         sb.StencilBufferEnable = true,
  #endif
-         .StencilBufferObjectControlState = GENX(MOCS),
+         sb.StencilBufferObjectControlState = GENX(MOCS),
  
           /* Stencil buffers have strange pitch. The PRM says:
            *
            *    The pitch must be set to 2x the value computed based on width,
            *    as the stencil buffer is stored with two rows interleaved.
            */
-         .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1,
+         sb.SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1,
  
  #if GEN_GEN >= 8
-         .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2,
+         sb.SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2,
  #endif
-         .SurfaceBaseAddress = {
+         sb.SurfaceBaseAddress = (struct anv_address) {
              .bo = image->bo,
              .offset = image->offset + image->stencil_surface.offset,
-         });
+         };
+      }
     } else {
-      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER));
+      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
     }
  
     /* Disable hierarchial depth buffers. */
-   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER));
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hz);
  
     /* Clear the clear params. */
-   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS));
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp);
  }
  
  /**
@@ -927,15 +1046,16 @@ void genX(CmdBeginRenderPass)(
  
     const VkRect2D *render_area = &pRenderPassBegin->renderArea;
  
-   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE),
-                  .ClippedDrawingRectangleYMin = MAX2(render_area->offset.y, 0),
-                  .ClippedDrawingRectangleXMin = MAX2(render_area->offset.x, 0),
-                  .ClippedDrawingRectangleYMax =
-                     render_area->offset.y + render_area->extent.height - 1,
-                  .ClippedDrawingRectangleXMax =
-                     render_area->offset.x + render_area->extent.width - 1,
-                  .DrawingRectangleOriginY = 0,
-                  .DrawingRectangleOriginX = 0);
+   anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE), r) {
+      r.ClippedDrawingRectangleYMin = MAX2(render_area->offset.y, 0);
+      r.ClippedDrawingRectangleXMin = MAX2(render_area->offset.x, 0);
+      r.ClippedDrawingRectangleYMax =
+         render_area->offset.y + render_area->extent.height - 1;
+      r.ClippedDrawingRectangleXMax =
+         render_area->offset.x + render_area->extent.width - 1;
+      r.DrawingRectangleOriginY     = 0;
+      r.DrawingRectangleOriginX     = 0;
+   }
  
     genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
     anv_cmd_buffer_clear_subpass(cmd_buffer);
@@ -966,22 +1086,24 @@ static void
  emit_ps_depth_count(struct anv_batch *batch,
                      struct anv_bo *bo, uint32_t offset)
  {
-   anv_batch_emit(batch, GENX(PIPE_CONTROL),
-                  .DestinationAddressType = DAT_PPGTT,
-                  .PostSyncOperation = WritePSDepthCount,
-                  .DepthStallEnable = true,
-                  .Address = { bo, offset });
+   anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
+      pc.DestinationAddressType  = DAT_PPGTT;
+      pc.PostSyncOperation       = WritePSDepthCount;
+      pc.DepthStallEnable        = true;
+      pc.Address                 = (struct anv_address) { bo, offset };
+   }
  }
  
  static void
  emit_query_availability(struct anv_batch *batch,
                          struct anv_bo *bo, uint32_t offset)
  {
-   anv_batch_emit(batch, GENX(PIPE_CONTROL),
-                  .DestinationAddressType = DAT_PPGTT,
-                  .PostSyncOperation = WriteImmediateData,
-                  .Address = { bo, offset },
-                  .ImmediateData = 1);
+   anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
+      pc.DestinationAddressType  = DAT_PPGTT;
+      pc.PostSyncOperation       = WriteImmediateData;
+      pc.Address                 = (struct anv_address) { bo, offset };
+      pc.ImmediateData           = 1;
+   }
  }
  
  void genX(CmdBeginQuery)(
@@ -1001,9 +1123,10 @@ void genX(CmdBeginQuery)(
      */
     if (cmd_buffer->state.need_query_wa) {
        cmd_buffer->state.need_query_wa = false;
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .DepthCacheFlushEnable = true,
-                     .DepthStallEnable = true);
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.DepthCacheFlushEnable   = true;
+         pc.DepthStallEnable        = true;
+      }
     }
  
     switch (pool->type) {
@@ -1057,20 +1180,23 @@ void genX(CmdWriteTimestamp)(
  
     switch (pipelineStage) {
     case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
-                     .RegisterAddress = TIMESTAMP,
-                     .MemoryAddress = { &pool->bo, offset });
-      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
-                     .RegisterAddress = TIMESTAMP + 4,
-                     .MemoryAddress = { &pool->bo, offset + 4 });
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+         srm.RegisterAddress  = TIMESTAMP;
+         srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset };
+      }
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+         srm.RegisterAddress  = TIMESTAMP + 4;
+         srm.MemoryAddress    = (struct anv_address) { &pool->bo, offset + 4 };
+      }
        break;
  
     default:
        /* Everything else is bottom-of-pipe */
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .DestinationAddressType = DAT_PPGTT,
-                     .PostSyncOperation = WriteTimestamp,
-                     .Address = { &pool->bo, offset });
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.DestinationAddressType  = DAT_PPGTT,
+         pc.PostSyncOperation       = WriteTimestamp,
+         pc.Address = (struct anv_address) { &pool->bo, offset };
+      }
        break;
     }
  
@@ -1115,26 +1241,31 @@ static void
  emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
                        struct anv_bo *bo, uint32_t offset)
  {
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
-                  .RegisterAddress = reg,
-                  .MemoryAddress = { bo, offset });
-   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
-                  .RegisterAddress = reg + 4,
-                  .MemoryAddress = { bo, offset + 4 });
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = reg,
+      lrm.MemoryAddress    = (struct anv_address) { bo, offset };
+   }
+   anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+      lrm.RegisterAddress  = reg + 4;
+      lrm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
+   }
  }
  
  static void
  store_query_result(struct anv_batch *batch, uint32_t reg,
                     struct anv_bo *bo, uint32_t offset, VkQueryResultFlags flags)
  {
-      anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
-                     .RegisterAddress = reg,
-                     .MemoryAddress = { bo, offset });
-
-      if (flags & VK_QUERY_RESULT_64_BIT)
-         anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM),
-                        .RegisterAddress = reg + 4,
-                        .MemoryAddress = { bo, offset + 4 });
+   anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+      srm.RegisterAddress  = reg;
+      srm.MemoryAddress    = (struct anv_address) { bo, offset };
+   }
+
+   if (flags & VK_QUERY_RESULT_64_BIT) {
+      anv_batch_emit(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
+         srm.RegisterAddress  = reg + 4;
+         srm.MemoryAddress    = (struct anv_address) { bo, offset + 4 };
+      }
+   }
  }
  
  void genX(CmdCopyQueryPoolResults)(
@@ -1152,10 +1283,12 @@ void genX(CmdCopyQueryPoolResults)(
     ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
     uint32_t slot_offset, dst_offset;
  
-   if (flags & VK_QUERY_RESULT_WAIT_BIT)
-      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
-                     .CommandStreamerStallEnable = true,
-                     .StallAtPixelScoreboard = true);
+   if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+         pc.CommandStreamerStallEnable = true;
+         pc.StallAtPixelScoreboard     = true;
+      }
+   }
  
     dst_offset = buffer->offset + destOffset;
     for (uint32_t i = 0; i < queryCount; i++) {