src/intel/vulkan/gen8_cmd_buffer.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28 #include <fcntl.h>
  29
  30 #include "anv_private.h"
  31
  32 #include "genxml/gen_macros.h"
  33 #include "genxml/genX_pack.h"
  34
  35 #if GEN_GEN == 8
  36 static void
  37 emit_viewport_state(struct anv_cmd_buffer *cmd_buffer,
  38                     uint32_t count, const VkViewport *viewports)
  39 {
  40    struct anv_state sf_clip_state =
  41       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
  42    struct anv_state cc_state =
  43       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
  44
  45    for (uint32_t i = 0; i < count; i++) {
  46       const VkViewport *vp = &viewports[i];
  47
  48       /* The gen7 state struct has just the matrix and guardband fields, the
  49        * gen8 struct adds the min/max viewport fields. */
  50       struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
  51          .ViewportMatrixElementm00 = vp->width / 2,
  52          .ViewportMatrixElementm11 = vp->height / 2,
  53          .ViewportMatrixElementm22 = 1.0,
  54          .ViewportMatrixElementm30 = vp->x + vp->width / 2,
  55          .ViewportMatrixElementm31 = vp->y + vp->height / 2,
  56          .ViewportMatrixElementm32 = 0.0,
  57          .XMinClipGuardband = -1.0f,
  58          .XMaxClipGuardband = 1.0f,
  59          .YMinClipGuardband = -1.0f,
  60          .YMaxClipGuardband = 1.0f,
  61          .XMinViewPort = vp->x,
  62          .XMaxViewPort = vp->x + vp->width - 1,
  63          .YMinViewPort = vp->y,
  64          .YMaxViewPort = vp->y + vp->height - 1,
  65       };
  66
  67       struct GENX(CC_VIEWPORT) cc_viewport = {
  68          .MinimumDepth = vp->minDepth,
  69          .MaximumDepth = vp->maxDepth
  70       };
  71
  72       GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
  73                                  &sf_clip_viewport);
  74       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
  75    }
  76
  77    if (!cmd_buffer->device->info.has_llc) {
  78       anv_state_clflush(sf_clip_state);
  79       anv_state_clflush(cc_state);
  80    }
  81
  82    anv_batch_emit(&cmd_buffer->batch,
  83                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC),
  84                   .CCViewportPointer = cc_state.offset);
  85    anv_batch_emit(&cmd_buffer->batch,
  86                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP),
  87                   .SFClipViewportPointer = sf_clip_state.offset);
  88 }
  89
  90 void
  91 gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
  92 {
  93    if (cmd_buffer->state.dynamic.viewport.count > 0) {
  94       emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count,
  95                           cmd_buffer->state.dynamic.viewport.viewports);
  96    } else {
  97       /* If viewport count is 0, this is taken to mean "use the default" */
  98       emit_viewport_state(cmd_buffer, 1,
  99                           &(VkViewport) {
 100                              .x = 0.0f,
 101                              .y = 0.0f,
 102                              .width = cmd_buffer->state.framebuffer->width,
 103                              .height = cmd_buffer->state.framebuffer->height,
 104                              .minDepth = 0.0f,
 105                              .maxDepth = 1.0f,
 106                           });
 107    }
 108 }
 109 #endif
 110
 111 #define emit_lri(batch, reg, imm)                       \
 112    anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),    \
 113                   .RegisterOffset = __anv_reg_num(reg), \
 114                   .DataDWord = imm)
 115
 116 void
 117 genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
 118 {
 119    /* References for GL state:
 120     *
 121     * - commits e307cfa..228d5a3
 122     * - src/mesa/drivers/dri/i965/gen7_l3_state.c
 123     */
 124
 125    uint32_t l3cr_slm, l3cr_noslm;
 126    anv_pack_struct(&l3cr_noslm, GENX(L3CNTLREG),
 127                    .URBAllocation = 48,
 128                    .AllAllocation = 48);
 129    anv_pack_struct(&l3cr_slm, GENX(L3CNTLREG),
 130                    .SLMEnable = 1,
 131                    .URBAllocation = 16,
 132                    .AllAllocation = 48);
 133    const uint32_t l3cr_val = enable_slm ? l3cr_slm : l3cr_noslm;
 134    bool changed = cmd_buffer->state.current_l3_config != l3cr_val;
 135
 136    if (changed) {
 137       /* According to the hardware docs, the L3 partitioning can only be
 138        * changed while the pipeline is completely drained and the caches are
 139        * flushed, which involves a first PIPE_CONTROL flush which stalls the
 140        * pipeline...
 141        */
 142       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
 143                      .DCFlushEnable = true,
 144                      .PostSyncOperation = NoWrite,
 145                      .CommandStreamerStallEnable = true);
 146
 147       /* ...followed by a second pipelined PIPE_CONTROL that initiates
 148        * invalidation of the relevant caches. Note that because RO
 149        * invalidation happens at the top of the pipeline (i.e. right away as
 150        * the PIPE_CONTROL command is processed by the CS) we cannot combine it
 151        * with the previous stalling flush as the hardware documentation
 152        * suggests, because that would cause the CS to stall on previous
 153        * rendering *after* RO invalidation and wouldn't prevent the RO caches
 154        * from being polluted by concurrent rendering before the stall
 155        * completes. This intentionally doesn't implement the SKL+ hardware
 156        * workaround suggesting to enable CS stall on PIPE_CONTROLs with the
 157        * texture cache invalidation bit set for GPGPU workloads because the
 158        * previous and subsequent PIPE_CONTROLs already guarantee that there is
 159        * no concurrent GPGPU kernel execution (see SKL HSD 2132585).
 160        */
 161       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
 162                      .TextureCacheInvalidationEnable = true,
 163                      .ConstantCacheInvalidationEnable = true,
 164                      .InstructionCacheInvalidateEnable = true,
 165                      .StateCacheInvalidationEnable = true,
 166                      .PostSyncOperation = NoWrite);
 167
 168       /* Now send a third stalling flush to make sure that invalidation is
 169        * complete when the L3 configuration registers are modified.
 170        */
 171       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
 172                      .DCFlushEnable = true,
 173                      .PostSyncOperation = NoWrite,
 174                      .CommandStreamerStallEnable = true);
 175
 176       emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr_val);
 177       cmd_buffer->state.current_l3_config = l3cr_val;
 178    }
 179 }
 180
 181 static void
 182 __emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer)
 183 {
 184       uint32_t sf_dw[GENX(3DSTATE_SF_length)];
 185       struct GENX(3DSTATE_SF) sf = {
 186          GENX(3DSTATE_SF_header),
 187          .LineWidth = cmd_buffer->state.dynamic.line_width,
 188       };
 189       GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
 190       /* FIXME: gen9.fs */
 191       anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
 192                            cmd_buffer->state.pipeline->gen8.sf);
 193 }
 194
 195 #include "genxml/gen9_pack.h"
 196 static void
 197 __emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer)
 198 {
 199       uint32_t sf_dw[GENX(3DSTATE_SF_length)];
 200       struct GEN9_3DSTATE_SF sf = {
 201          GEN9_3DSTATE_SF_header,
 202          .LineWidth = cmd_buffer->state.dynamic.line_width,
 203       };
 204       GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf);
 205       /* FIXME: gen9.fs */
 206       anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
 207                            cmd_buffer->state.pipeline->gen8.sf);
 208 }
 209
 210 static void
 211 __emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
 212 {
 213    if (cmd_buffer->device->info.is_cherryview)
 214       __emit_gen9_sf_state(cmd_buffer);
 215    else
 216       __emit_genx_sf_state(cmd_buffer);
 217 }
 218
 219 void
 220 genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
 221 {
 222    struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
 223
 224    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
 225                                   ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
 226       __emit_sf_state(cmd_buffer);
 227    }
 228
 229    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
 230                                   ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
 231       uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
 232       struct GENX(3DSTATE_RASTER) raster = {
 233          GENX(3DSTATE_RASTER_header),
 234          .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
 235          .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
 236          .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
 237       };
 238       GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
 239       anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
 240                            pipeline->gen8.raster);
 241    }
 242
 243    /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
 244     * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
 245     * across different state packets for gen8 and gen9. We handle that by
 246     * using a big old #if switch here.
 247     */
 248 #if GEN_GEN == 8
 249    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
 250                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
 251       struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
 252       struct anv_state cc_state =
 253          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
 254                                             GENX(COLOR_CALC_STATE_length) * 4,
 255                                             64);
 256       struct GENX(COLOR_CALC_STATE) cc = {
 257          .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
 258          .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
 259          .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
 260          .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
 261          .StencilReferenceValue = d->stencil_reference.front & 0xff,
 262          .BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
 263       };
 264       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
 265
 266       if (!cmd_buffer->device->info.has_llc)
 267          anv_state_clflush(cc_state);
 268
 269       anv_batch_emit(&cmd_buffer->batch,
 270                      GENX(3DSTATE_CC_STATE_POINTERS),
 271                      .ColorCalcStatePointer = cc_state.offset,
 272                      .ColorCalcStatePointerValid = true);
 273    }
 274
 275    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
 276                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
 277                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
 278       uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
 279       struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
 280
 281       struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = {
 282          GENX(3DSTATE_WM_DEPTH_STENCIL_header),
 283
 284          .StencilTestMask = d->stencil_compare_mask.front & 0xff,
 285          .StencilWriteMask = d->stencil_write_mask.front & 0xff,
 286
 287          .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
 288          .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
 289       };
 290       GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,
 291                                           &wm_depth_stencil);
 292
 293       anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
 294                            pipeline->gen8.wm_depth_stencil);
 295    }
 296 #else
 297    if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
 298       struct anv_state cc_state =
 299          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
 300                                             GEN9_COLOR_CALC_STATE_length * 4,
 301                                             64);
 302       struct GEN9_COLOR_CALC_STATE cc = {
 303          .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
 304          .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
 305          .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
 306          .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
 307       };
 308       GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
 309
 310       if (!cmd_buffer->device->info.has_llc)
 311          anv_state_clflush(cc_state);
 312
 313       anv_batch_emit(&cmd_buffer->batch,
 314                      GEN9_3DSTATE_CC_STATE_POINTERS,
 315                      .ColorCalcStatePointer = cc_state.offset,
 316                      .ColorCalcStatePointerValid = true);
 317    }
 318
 319    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
 320                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
 321                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
 322                                   ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
 323       uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length];
 324       struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
 325       struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
 326          GEN9_3DSTATE_WM_DEPTH_STENCIL_header,
 327
 328          .StencilTestMask = d->stencil_compare_mask.front & 0xff,
 329          .StencilWriteMask = d->stencil_write_mask.front & 0xff,
 330
 331          .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
 332          .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
 333
 334          .StencilReferenceValue = d->stencil_reference.front & 0xff,
 335          .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
 336       };
 337       GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil);
 338
 339       anv_batch_emit_merge(&cmd_buffer->batch, dwords,
 340                            pipeline->gen9.wm_depth_stencil);
 341    }
 342 #endif
 343
 344    if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
 345                                   ANV_CMD_DIRTY_INDEX_BUFFER)) {
 346       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF),
 347          .IndexedDrawCutIndexEnable = pipeline->primitive_restart,
 348          .CutIndex = cmd_buffer->state.restart_index,
 349       );
 350    }
 351
 352    cmd_buffer->state.dirty = 0;
 353 }
 354
 355 void genX(CmdBindIndexBuffer)(
 356     VkCommandBuffer                             commandBuffer,
 357     VkBuffer                                    _buffer,
 358     VkDeviceSize                                offset,
 359     VkIndexType                                 indexType)
 360 {
 361    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 362    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
 363
 364    static const uint32_t vk_to_gen_index_type[] = {
 365       [VK_INDEX_TYPE_UINT16]                    = INDEX_WORD,
 366       [VK_INDEX_TYPE_UINT32]                    = INDEX_DWORD,
 367    };
 368
 369    static const uint32_t restart_index_for_type[] = {
 370       [VK_INDEX_TYPE_UINT16]                    = UINT16_MAX,
 371       [VK_INDEX_TYPE_UINT32]                    = UINT32_MAX,
 372    };
 373
 374    cmd_buffer->state.restart_index = restart_index_for_type[indexType];
 375
 376    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER),
 377                   .IndexFormat = vk_to_gen_index_type[indexType],
 378                   .MemoryObjectControlState = GENX(MOCS),
 379                   .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
 380                   .BufferSize = buffer->size - offset);
 381
 382    cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
 383 }
 384
 385 static VkResult
 386 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
 387 {
 388    struct anv_device *device = cmd_buffer->device;
 389    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
 390    struct anv_state surfaces = { 0, }, samplers = { 0, };
 391    VkResult result;
 392
 393    result = anv_cmd_buffer_emit_samplers(cmd_buffer,
 394                                          MESA_SHADER_COMPUTE, &samplers);
 395    if (result != VK_SUCCESS)
 396       return result;
 397    result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
 398                                               MESA_SHADER_COMPUTE, &surfaces);
 399    if (result != VK_SUCCESS)
 400       return result;
 401
 402    struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
 403
 404    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
 405    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 406
 407    unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
 408    unsigned push_constant_data_size =
 409       (prog_data->nr_params + local_id_dwords) * 4;
 410    unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
 411    unsigned push_constant_regs = reg_aligned_constant_size / 32;
 412
 413    if (push_state.alloc_size) {
 414       anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
 415                      .CURBETotalDataLength = push_state.alloc_size,
 416                      .CURBEDataStartAddress = push_state.offset);
 417    }
 418
 419    assert(prog_data->total_shared <= 64 * 1024);
 420    uint32_t slm_size = 0;
 421    if (prog_data->total_shared > 0) {
 422       /* slm_size is in 4k increments, but must be a power of 2. */
 423       slm_size = 4 * 1024;
 424       while (slm_size < prog_data->total_shared)
 425          slm_size <<= 1;
 426       slm_size /= 4 * 1024;
 427    }
 428
 429    struct anv_state state =
 430       anv_state_pool_emit(&device->dynamic_state_pool,
 431                           GENX(INTERFACE_DESCRIPTOR_DATA), 64,
 432                           .KernelStartPointer = pipeline->cs_simd,
 433                           .KernelStartPointerHigh = 0,
 434                           .BindingTablePointer = surfaces.offset,
 435                           .BindingTableEntryCount = 0,
 436                           .SamplerStatePointer = samplers.offset,
 437                           .SamplerCount = 0,
 438                           .ConstantIndirectURBEntryReadLength = push_constant_regs,
 439                           .ConstantURBEntryReadOffset = 0,
 440                           .BarrierEnable = cs_prog_data->uses_barrier,
 441                           .SharedLocalMemorySize = slm_size,
 442                           .NumberofThreadsinGPGPUThreadGroup =
 443                              pipeline->cs_thread_width_max);
 444
 445    uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
 446    anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
 447                   .InterfaceDescriptorTotalLength = size,
 448                   .InterfaceDescriptorDataStartAddress = state.offset);
 449
 450    return VK_SUCCESS;
 451 }
 452
 453 void
 454 genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
 455 {
 456    struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
 457    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
 458    VkResult result;
 459
 460    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
 461
 462    bool needs_slm = cs_prog_data->base.total_shared > 0;
 463    genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm);
 464
 465    genX(flush_pipeline_select_gpgpu)(cmd_buffer);
 466
 467    if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
 468       anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
 469
 470    if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
 471        (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
 472       result = flush_compute_descriptor_set(cmd_buffer);
 473       assert(result == VK_SUCCESS);
 474       cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
 475    }
 476
 477    cmd_buffer->state.compute_dirty = 0;
 478 }
 479
 480 void genX(CmdSetEvent)(
 481     VkCommandBuffer                             commandBuffer,
 482     VkEvent                                     _event,
 483     VkPipelineStageFlags                        stageMask)
 484 {
 485    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 486    ANV_FROM_HANDLE(anv_event, event, _event);
 487
 488    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
 489                   .DestinationAddressType = DAT_PPGTT,
 490                   .PostSyncOperation = WriteImmediateData,
 491                   .Address = {
 492                      &cmd_buffer->device->dynamic_state_block_pool.bo,
 493                      event->state.offset
 494                    },
 495                   .ImmediateData = VK_EVENT_SET);
 496 }
 497
 498 void genX(CmdResetEvent)(
 499     VkCommandBuffer                             commandBuffer,
 500     VkEvent                                     _event,
 501     VkPipelineStageFlags                        stageMask)
 502 {
 503    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 504    ANV_FROM_HANDLE(anv_event, event, _event);
 505
 506    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
 507                   .DestinationAddressType = DAT_PPGTT,
 508                   .PostSyncOperation = WriteImmediateData,
 509                   .Address = {
 510                      &cmd_buffer->device->dynamic_state_block_pool.bo,
 511                      event->state.offset
 512                    },
 513                   .ImmediateData = VK_EVENT_RESET);
 514 }
 515
 516 void genX(CmdWaitEvents)(
 517     VkCommandBuffer                             commandBuffer,
 518     uint32_t                                    eventCount,
 519     const VkEvent*                              pEvents,
 520     VkPipelineStageFlags                        srcStageMask,
 521     VkPipelineStageFlags                        destStageMask,
 522     uint32_t                                    memoryBarrierCount,
 523     const VkMemoryBarrier*                      pMemoryBarriers,
 524     uint32_t                                    bufferMemoryBarrierCount,
 525     const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
 526     uint32_t                                    imageMemoryBarrierCount,
 527     const VkImageMemoryBarrier*                 pImageMemoryBarriers)
 528 {
 529    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
 530    for (uint32_t i = 0; i < eventCount; i++) {
 531       ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
 532
 533       anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT),
 534                      .WaitMode = PollingMode,
 535                      .CompareOperation = COMPARE_SAD_EQUAL_SDD,
 536                      .SemaphoreDataDword = VK_EVENT_SET,
 537                      .SemaphoreAddress = {
 538                         &cmd_buffer->device->dynamic_state_block_pool.bo,
 539                         event->state.offset
 540                      });
 541    }
 542
 543    genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
 544                             false, /* byRegion */
 545                             memoryBarrierCount, pMemoryBarriers,
 546                             bufferMemoryBarrierCount, pBufferMemoryBarriers,
 547                             imageMemoryBarrierCount, pImageMemoryBarriers);
 548 }