* we do any more rendering or clearing.
*/
cmd_buffer->state.pending_pipe_bits |=
- ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
for (uint32_t l = 0; l < level_count; l++) {
uint32_t level = base_level + l;
}
cmd_buffer->state.pending_pipe_bits |=
- ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT;
}
/**
if (cmd_buffer->device->physical->always_flush_cache)
bits |= ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS;
- /* Flushes are pipelined while invalidations are handled immediately.
- * Therefore, if we're flushing anything then we need to schedule a stall
- * before any invalidations can happen.
+ /*
+ * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
+ *
+ * Write synchronization is a special case of end-of-pipe
+ * synchronization that requires that the render cache and/or depth
+ * related caches are flushed to memory, where the data will become
+ * globally visible. This type of synchronization is required prior to
+ * SW (CPU) actually reading the result data from memory, or initiating
+ * an operation that will use as a read surface (such as a texture
+ * surface) a previous render target and/or depth/stencil buffer
+ *
+ *
+ * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
+ *
+ * Exercising the write cache flush bits (Render Target Cache Flush
+ * Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
+ * ensures the write caches are flushed and doesn't guarantee the data
+ * is globally visible.
+ *
+ * SW can track the completion of the end-of-pipe-synchronization by
+ * using "Notify Enable" and "PostSync Operation - Write Immediate
+ * Data" in the PIPE_CONTROL command.
+ *
+ * In other words, flushes are pipelined while invalidations are handled
+ * immediately. Therefore, if we're flushing anything then we need to
+ * schedule an end-of-pipe sync before any invalidations can happen.
*/
if (bits & ANV_PIPE_FLUSH_BITS)
- bits |= ANV_PIPE_NEEDS_CS_STALL_BIT;
+ bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT;
+
- /* If we're going to do an invalidate and we have a pending CS stall that
- * has yet to be resolved, we do the CS stall now.
+ /* HSD 1209978178: docs say that before programming the aux table:
+ *
+ * "Driver must ensure that the engine is IDLE but ensure it doesn't
+ * add extra flushes in the case it knows that the engine is already
+ * IDLE."
+ */
+ if (GEN_GEN == 12 && ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
+ bits |= ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT;
+
+ /* If we're going to do an invalidate and we have a pending end-of-pipe
+ * sync that has yet to be resolved, we do the end-of-pipe sync now.
*/
if ((bits & ANV_PIPE_INVALIDATE_BITS) &&
- (bits & ANV_PIPE_NEEDS_CS_STALL_BIT)) {
- bits |= ANV_PIPE_CS_STALL_BIT;
- bits &= ~ANV_PIPE_NEEDS_CS_STALL_BIT;
+ (bits & ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT)) {
+ bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+ bits &= ~ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT;
}
if (GEN_GEN >= 12 &&
bits &= ~ANV_PIPE_POST_SYNC_BIT;
}
- if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) {
+ if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT |
+ ANV_PIPE_END_OF_PIPE_SYNC_BIT)) {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) {
#if GEN_GEN >= 12
pipe.TileCacheFlushEnable = bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT;
pipe.CommandStreamerStallEnable = bits & ANV_PIPE_CS_STALL_BIT;
pipe.StallAtPixelScoreboard = bits & ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
+ /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
+ *
+ * "The most common action to perform upon reaching a
+ * synchronization point is to write a value out to memory. An
+ * immediate value (included with the synchronization command) may
+ * be written."
+ *
+ *
+ * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
+ *
+ * "In case the data flushed out by the render engine is to be
+ * read back in to the render engine in coherent manner, then the
+ * render engine has to wait for the fence completion before
+ * accessing the flushed data. This can be achieved by following
+ * means on various products: PIPE_CONTROL command with CS Stall
+ * and the required write caches flushed with Post-Sync-Operation
+ * as Write Immediate Data.
+ *
+ * Example:
+ * - Workload-1 (3D/GPGPU/MEDIA)
+ * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
+ * Immediate Data, Required Write Cache Flush bits set)
+ * - Workload-2 (Can use the data produce or output by
+ * Workload-1)
+ */
+ if (bits & ANV_PIPE_END_OF_PIPE_SYNC_BIT) {
+ pipe.CommandStreamerStallEnable = true;
+ pipe.PostSyncOperation = WriteImmediateData;
+ pipe.Address = (struct anv_address) {
+ .bo = cmd_buffer->device->workaround_bo,
+ .offset = 0
+ };
+ }
+
/*
* According to the Broadwell documentation, any PIPE_CONTROL with the
* "Command Streamer Stall" bit set must also have another bit set,
* I chose "Stall at Pixel Scoreboard" since that's what we use in
* mesa and it seems to work fine. The choice is fairly arbitrary.
*/
- if ((bits & ANV_PIPE_CS_STALL_BIT) &&
- !(bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_DEPTH_STALL_BIT |
- ANV_PIPE_STALL_AT_SCOREBOARD_BIT)))
+ if (pipe.CommandStreamerStallEnable &&
+ !pipe.RenderTargetCacheFlushEnable &&
+ !pipe.DepthCacheFlushEnable &&
+ !pipe.StallAtPixelScoreboard &&
+ !pipe.PostSyncOperation &&
+ !pipe.DepthStallEnable &&
+ !pipe.DCFlushEnable)
pipe.StallAtPixelScoreboard = true;
}
if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
bits &= ~(ANV_PIPE_RENDER_TARGET_BUFFER_WRITES);
- bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT);
+ if (GEN_IS_HASWELL) {
+ /* Haswell needs addition work-arounds:
+ *
+ * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
+ *
+ * Option 1:
+ * PIPE_CONTROL command with the CS Stall and the required write
+ * caches flushed with Post-SyncOperation as Write Immediate Data
+ * followed by eight dummy MI_STORE_DATA_IMM (write to scratch
+ * spce) commands.
+ *
+ * Example:
+ * - Workload-1
+ * - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
+ * Immediate Data, Required Write Cache Flush bits set)
+ * - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
+ * - Workload-2 (Can use the data produce or output by
+ * Workload-1)
+ *
+ * Unfortunately, both the PRMs and the internal docs are a bit
+ * out-of-date in this regard. What the windows driver does (and
+ * this appears to actually work) is to emit a register read from the
+ * memory address written by the pipe control above.
+ *
+ * What register we load into doesn't matter. We choose an indirect
+ * rendering register because we know it always exists and it's one
+ * of the first registers the command parser allows us to write. If
+ * you don't have command parser support in your kernel (pre-4.2),
+ * this will get turned into MI_NOOP and you won't get the
+ * workaround. Unfortunately, there's just not much we can do in
+ * that case. This register is perfectly safe to write since we
+ * always re-load all of the indirect draw registers right before
+ * 3DPRIMITIVE when needed anyway.
+ */
+ anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+ lrm.RegisterAddress = 0x243C; /* GEN7_3DPRIM_START_INSTANCE */
+ lrm.MemoryAddress = (struct anv_address) {
+ .bo = cmd_buffer->device->workaround_bo,
+ .offset = 0
+ };
+ }
+ }
+
+ bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT |
+ ANV_PIPE_END_OF_PIPE_SYNC_BIT);
}
if (bits & ANV_PIPE_INVALIDATE_BITS) {
static VkResult
emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_cmd_pipeline_state *pipe_state,
gl_shader_stage stage,
struct anv_state *bt_state)
{
struct anv_subpass *subpass = cmd_buffer->state.subpass;
- struct anv_cmd_pipeline_state *pipe_state;
- struct anv_pipeline *pipeline;
uint32_t state_offset;
- switch (stage) {
- case MESA_SHADER_COMPUTE:
- pipe_state = &cmd_buffer->state.compute.base;
- break;
- default:
- pipe_state = &cmd_buffer->state.gfx.base;
- break;
- }
- pipeline = pipe_state->pipeline;
-
- if (!anv_pipeline_has_stage(pipeline, stage)) {
- *bt_state = (struct anv_state) { 0, };
- return VK_SUCCESS;
- }
+ struct anv_pipeline *pipeline = pipe_state->pipeline;
struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (map->surface_count == 0) {
/* Clamp the range to the buffer size */
uint32_t range = MIN2(desc->range, desc->buffer->size - offset);
+ /* Align the range for consistency */
+ if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)
+ range = align_u32(range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
+
struct anv_address address =
anv_address_add(desc->buffer->address, offset);
static VkResult
emit_samplers(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_cmd_pipeline_state *pipe_state,
gl_shader_stage stage,
struct anv_state *state)
{
- struct anv_cmd_pipeline_state *pipe_state =
- stage == MESA_SHADER_COMPUTE ? &cmd_buffer->state.compute.base :
- &cmd_buffer->state.gfx.base;
struct anv_pipeline *pipeline = pipe_state->pipeline;
- if (!anv_pipeline_has_stage(pipeline, stage)) {
- *state = (struct anv_state) { 0, };
- return VK_SUCCESS;
- }
-
struct anv_pipeline_bind_map *map = &pipeline->shaders[stage]->bind_map;
if (map->sampler_count == 0) {
*state = (struct anv_state) { 0, };
static uint32_t
flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
- struct anv_pipeline *pipeline)
+ struct anv_cmd_pipeline_state *pipe_state)
{
VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
- pipeline->active_stages;
+ pipe_state->pipeline->active_stages;
VkResult result = VK_SUCCESS;
anv_foreach_stage(s, dirty) {
- result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]);
+ result = emit_samplers(cmd_buffer, pipe_state, s,
+ &cmd_buffer->state.samplers[s]);
if (result != VK_SUCCESS)
break;
- result = emit_binding_table(cmd_buffer, s,
+ result = emit_binding_table(cmd_buffer, pipe_state, s,
&cmd_buffer->state.binding_tables[s]);
if (result != VK_SUCCESS)
break;
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
/* Re-emit all active binding tables */
- dirty |= pipeline->active_stages;
+ dirty |= pipe_state->pipeline->active_stages;
anv_foreach_stage(s, dirty) {
- result = emit_samplers(cmd_buffer, s, &cmd_buffer->state.samplers[s]);
+ result = emit_samplers(cmd_buffer, pipe_state, s,
+ &cmd_buffer->state.samplers[s]);
if (result != VK_SUCCESS) {
anv_batch_set_error(&cmd_buffer->batch, result);
return 0;
}
- result = emit_binding_table(cmd_buffer, s,
+ result = emit_binding_table(cmd_buffer, pipe_state, s,
&cmd_buffer->state.binding_tables[s]);
if (result != VK_SUCCESS) {
anv_batch_set_error(&cmd_buffer->batch, result);
}
}
-#if GEN_GEN >= 8 || GEN_IS_HASWELL
static struct anv_address
get_push_range_address(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage,
struct anv_descriptor_set *set =
gfx_state->base.descriptors[range->index];
return anv_descriptor_set_address(cmd_buffer, set);
- break;
}
case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS: {
.bo = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = state.offset,
};
- break;
}
default: {
}
}
}
-#endif
+
+
+/** Returns the size in bytes of the bound buffer relative to range->start
+ *
+ * This may be smaller than range->length * 32.
+ */
+static uint32_t
+get_push_range_bound_size(struct anv_cmd_buffer *cmd_buffer,
+ gl_shader_stage stage,
+ const struct anv_push_range *range)
+{
+ assert(stage != MESA_SHADER_COMPUTE);
+ const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
+ switch (range->set) {
+ case ANV_DESCRIPTOR_SET_DESCRIPTORS: {
+ struct anv_descriptor_set *set =
+ gfx_state->base.descriptors[range->index];
+ assert(range->start * 32 < set->desc_mem.alloc_size);
+ assert((range->start + range->length) * 32 < set->desc_mem.alloc_size);
+ return set->desc_mem.alloc_size - range->start * 32;
+ }
+
+ case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
+ return range->length * 32;
+
+ default: {
+ assert(range->set < MAX_SETS);
+ struct anv_descriptor_set *set =
+ gfx_state->base.descriptors[range->set];
+ const struct anv_descriptor *desc =
+ &set->descriptors[range->index];
+
+ if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+ if (range->start * 32 > desc->buffer_view->range)
+ return 0;
+
+ return desc->buffer_view->range - range->start * 32;
+ } else {
+ assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+ /* Compute the offset within the buffer */
+ struct anv_push_constants *push =
+ &cmd_buffer->state.push_constants[stage];
+ uint32_t dynamic_offset =
+ push->dynamic_offsets[range->dynamic_offset_index];
+ uint64_t offset = desc->offset + dynamic_offset;
+ /* Clamp to the buffer size */
+ offset = MIN2(offset, desc->buffer->size);
+ /* Clamp the range to the buffer size */
+ uint32_t bound_range = MIN2(desc->range, desc->buffer->size - offset);
+
+ /* Align the range for consistency */
+ bound_range = align_u32(bound_range, ANV_UBO_BOUNDS_CHECK_ALIGNMENT);
+
+ if (range->start * 32 > bound_range)
+ return 0;
+
+ return bound_range - range->start * 32;
+ }
+ }
+ }
+}
static void
cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
- gl_shader_stage stage, unsigned buffer_count)
+ gl_shader_stage stage,
+ struct anv_address *buffers,
+ unsigned buffer_count)
{
const struct anv_cmd_graphics_state *gfx_state = &cmd_buffer->state.gfx;
const struct anv_pipeline *pipeline = gfx_state->base.pipeline;
const struct anv_pipeline_bind_map *bind_map =
&pipeline->shaders[stage]->bind_map;
+#if GEN_GEN >= 12
+ c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
+#endif
+
#if GEN_GEN >= 8 || GEN_IS_HASWELL
/* The Skylake PRM contains the following restriction:
*
*/
assert((GEN_GEN >= 8 || GEN_IS_HASWELL) || i == 0);
- const struct anv_address addr =
- get_push_range_address(cmd_buffer, stage, range);
c.ConstantBody.ReadLength[i + shift] = range->length;
c.ConstantBody.Buffer[i + shift] =
- anv_address_add(addr, range->start * 32);
+ anv_address_add(buffers[i], range->start * 32);
}
#else
/* For Ivy Bridge, push constants are relative to dynamic state
* base address and we only ever push actual push constants.
*/
if (bind_map->push_ranges[0].length > 0) {
+ assert(buffer_count == 1);
assert(bind_map->push_ranges[0].set ==
ANV_DESCRIPTOR_SET_PUSH_CONSTANTS);
- struct anv_state state =
- anv_cmd_buffer_push_constants(cmd_buffer, stage);
+ assert(buffers[0].bo ==
+ cmd_buffer->device->dynamic_state_pool.block_pool.bo);
c.ConstantBody.ReadLength[0] = bind_map->push_ranges[0].length;
c.ConstantBody.Buffer[0].bo = NULL;
- c.ConstantBody.Buffer[0].offset = state.offset;
+ c.ConstantBody.Buffer[0].offset = buffers[0].offset;
}
assert(bind_map->push_ranges[1].length == 0);
assert(bind_map->push_ranges[2].length == 0);
#if GEN_GEN >= 12
static void
cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
- uint32_t shader_mask, uint32_t count)
+ uint32_t shader_mask,
+ struct anv_address *buffers,
+ uint32_t buffer_count)
{
- if (count == 0) {
+ if (buffer_count == 0) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
c.ShaderUpdateEnable = shader_mask;
+ c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
}
return;
}
&pipeline->shaders[stage]->bind_map;
uint32_t *dw;
- const uint32_t buffers = (1 << count) - 1;
- const uint32_t num_dwords = 2 + 2 * count;
+ const uint32_t buffer_mask = (1 << buffer_count) - 1;
+ const uint32_t num_dwords = 2 + 2 * buffer_count;
dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
GENX(3DSTATE_CONSTANT_ALL),
.ShaderUpdateEnable = shader_mask,
- .PointerBufferMask = buffers);
+ .PointerBufferMask = buffer_mask,
+ .MOCS = cmd_buffer->device->isl_dev.mocs.internal);
- for (int i = 0; i < count; i++) {
+ for (int i = 0; i < buffer_count; i++) {
const struct anv_push_range *range = &bind_map->push_ranges[i];
- const struct anv_address addr =
- get_push_range_address(cmd_buffer, stage, range);
-
GENX(3DSTATE_CONSTANT_ALL_DATA_pack)(
&cmd_buffer->batch, dw + 2 + i * 2,
&(struct GENX(3DSTATE_CONSTANT_ALL_DATA)) {
- .PointerToConstantBuffer = anv_address_add(addr, range->start * 32),
+ .PointerToConstantBuffer =
+ anv_address_add(buffers[i], range->start * 32),
.ConstantBufferReadLength = range->length,
});
}
anv_foreach_stage(stage, dirty_stages) {
unsigned buffer_count = 0;
flushed |= mesa_to_vk_shader_stage(stage);
- uint32_t max_push_range = 0;
+ UNUSED uint32_t max_push_range = 0;
+ struct anv_address buffers[4] = {};
if (anv_pipeline_has_stage(pipeline, stage)) {
const struct anv_pipeline_bind_map *bind_map =
&pipeline->shaders[stage]->bind_map;
+ struct anv_push_constants *push =
+ &cmd_buffer->state.push_constants[stage];
+ if (cmd_buffer->device->robust_buffer_access) {
+ for (unsigned i = 0; i < 4; i++) {
+ const struct anv_push_range *range = &bind_map->push_ranges[i];
+ if (range->length == 0) {
+ push->push_ubo_sizes[i] = 0;
+ } else {
+ push->push_ubo_sizes[i] =
+ get_push_range_bound_size(cmd_buffer, stage, range);
+ }
+ cmd_buffer->state.push_constants_dirty |=
+ mesa_to_vk_shader_stage(stage);
+ }
+ }
+
+ /* We have to gather buffer addresses as a second step because the
+ * loop above puts data into the push constant area and the call to
+ * get_push_range_address is what locks our push constants and copies
+ * them into the actual GPU buffer. If we did the two loops at the
+ * same time, we'd risk only having some of the sizes in the push
+ * constant buffer when we did the copy.
+ */
for (unsigned i = 0; i < 4; i++) {
const struct anv_push_range *range = &bind_map->push_ranges[i];
- if (range->length > 0) {
- buffer_count++;
- if (GEN_GEN >= 12 && range->length > max_push_range)
- max_push_range = range->length;
- }
+ if (range->length == 0)
+ break;
+
+ buffers[i] = get_push_range_address(cmd_buffer, stage, range);
+ max_push_range = MAX2(max_push_range, range->length);
+ buffer_count++;
}
+
+ /* We have at most 4 buffers but they should be tightly packed */
+ for (unsigned i = buffer_count; i < 4; i++)
+ assert(bind_map->push_ranges[i].length == 0);
}
#if GEN_GEN >= 12
*/
if (max_push_range < 32) {
cmd_buffer_emit_push_constant_all(cmd_buffer, 1 << stage,
- buffer_count);
+ buffers, buffer_count);
continue;
}
#endif
- cmd_buffer_emit_push_constant(cmd_buffer, stage, buffer_count);
+ cmd_buffer_emit_push_constant(cmd_buffer, stage, buffers, buffer_count);
}
#if GEN_GEN >= 12
if (nobuffer_stages)
- cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, 0);
+ cmd_buffer_emit_push_constant_all(cmd_buffer, nobuffer_stages, NULL, 0);
#endif
cmd_buffer->state.push_constants_dirty &= ~flushed;
assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
- genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config);
+ genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config);
genX(cmd_buffer_emit_hashing_mode)(cmd_buffer, UINT_MAX, UINT_MAX, 1);
*/
uint32_t dirty = 0;
if (cmd_buffer->state.descriptors_dirty)
- dirty = flush_descriptor_sets(cmd_buffer, pipeline);
+ dirty = flush_descriptor_sets(cmd_buffer, &cmd_buffer->state.gfx.base);
if (dirty || cmd_buffer->state.push_constants_dirty) {
/* Because we're pushing UBOs, we have to push whenever either
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
- genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config);
+ genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->l3_config);
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
cmd_buffer->state.compute.pipeline_dirty) {
- flush_descriptor_sets(cmd_buffer, pipeline);
+ flush_descriptor_sets(cmd_buffer, &cmd_buffer->state.compute.base);
uint32_t iface_desc_data_dw[GENX(INTERFACE_DESCRIPTOR_DATA_length)];
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
att_state->pending_load_aspects = 0;
}
- cmd_buffer_emit_depth_stencil(cmd_buffer);
-
#if GEN_GEN >= 11
/* The PIPE_CONTROL command description says:
*
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
#endif
+
+#if GEN_GEN == 12
+ /* GEN:BUG:14010455700
+ *
+ * ISL will change some CHICKEN registers depending on the depth surface
+ * format, along with emitting the depth and stencil packets. In that case,
+ * we want to do a depth flush and stall, so the pipeline is not using these
+ * settings while we change the registers.
+ */
+ cmd_buffer->state.pending_pipe_bits |=
+ ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
+ ANV_PIPE_DEPTH_STALL_BIT |
+ ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+#endif
+
+ cmd_buffer_emit_depth_stencil(cmd_buffer);
}
static enum blorp_filter