X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2FgenX_cmd_buffer.c;h=51b14f2cb9d52ac165fd9ab4aaf41e5be8597b11;hb=89a3856714e2410e9ae3e0ee2cafe2fdd86e8b81;hp=9af5e9c8f5b5ea9b5a64911eed94d63e1e99051b;hpb=f8a4de6316f2b9b298a63dcb3bc6fa216d0076ad;p=mesa.git diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 9af5e9c8f5b..51b14f2cb9d 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -132,13 +132,21 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) * these fields. However, since we will be growing the BO's live, we * just set them all to the maximum. */ - sba.GeneralStateBufferSize = 0xfffff; + sba.GeneralStateBufferSize = 0xfffff; + sba.IndirectObjectBufferSize = 0xfffff; + if (device->physical->use_softpin) { + /* With softpin, we use fixed addresses so we actually know how big + * our base addresses are. + */ + sba.DynamicStateBufferSize = DYNAMIC_STATE_POOL_SIZE / 4096; + sba.InstructionBufferSize = INSTRUCTION_STATE_POOL_SIZE / 4096; + } else { + sba.DynamicStateBufferSize = 0xfffff; + sba.InstructionBufferSize = 0xfffff; + } sba.GeneralStateBufferSizeModifyEnable = true; - sba.DynamicStateBufferSize = 0xfffff; - sba.DynamicStateBufferSizeModifyEnable = true; - sba.IndirectObjectBufferSize = 0xfffff; sba.IndirectObjectBufferSizeModifyEnable = true; - sba.InstructionBufferSize = 0xfffff; + sba.DynamicStateBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; # else /* On gen7, we have upper bounds instead. According to the docs, @@ -308,6 +316,7 @@ color_attachment_compute_aux_usage(struct anv_device * device, att_state->aux_usage = anv_layout_to_aux_usage(&device->info, iview->image, VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); /* If we don't have aux, then we should have returned early in the layer @@ -465,6 +474,7 @@ depth_stencil_attachment_compute_aux_usage(struct anv_device *device, const enum isl_aux_usage first_subpass_aux_usage = anv_layout_to_aux_usage(&device->info, iview->image, VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, pass_att->first_subpass_layout); if (!blorp_can_hiz_clear_depth(&device->info, &iview->image->planes[0].surface.isl, @@ -520,7 +530,7 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer, { uint32_t depth_plane = anv_image_aspect_to_plane(image->aspects, VK_IMAGE_ASPECT_DEPTH_BIT); - if (image->planes[depth_plane].aux_surface.isl.size_B == 0) + if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE) return; const enum isl_aux_state initial_state = @@ -813,7 +823,7 @@ anv_cmd_predicated_ccs_resolve(struct anv_cmd_buffer *cmd_buffer, * to do a partial resolve on a CCS_D surface. */ if (resolve_op == ISL_AUX_OP_PARTIAL_RESOLVE && - image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE) + image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_D) resolve_op = ISL_AUX_OP_FULL_RESOLVE; anv_image_ccs_op(cmd_buffer, image, format, aspect, level, @@ -988,6 +998,105 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer, } } +#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x)) + +#if GEN_GEN == 12 +static void +anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t base_level, uint32_t level_count, + uint32_t base_layer, uint32_t layer_count) +{ + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + + uint64_t base_address = + anv_address_physical(image->planes[plane].address); + + const struct isl_surf *isl_surf = &image->planes[plane].surface.isl; + uint64_t format_bits = gen_aux_map_format_bits_for_isl_surf(isl_surf); + + /* We're about to live-update the AUX-TT. We really don't want anyone else + * trying to read it while we're doing this. We could probably get away + * with not having this stall in some cases if we were really careful but + * it's better to play it safe. Full stall the GPU. + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + struct gen_mi_builder b; + gen_mi_builder_init(&b, &cmd_buffer->batch); + + for (uint32_t a = 0; a < layer_count; a++) { + const uint32_t layer = base_layer + a; + + uint64_t start_offset_B = UINT64_MAX, end_offset_B = 0; + for (uint32_t l = 0; l < level_count; l++) { + const uint32_t level = base_level + l; + + uint32_t logical_array_layer, logical_z_offset_px; + if (image->type == VK_IMAGE_TYPE_3D) { + logical_array_layer = 0; + + /* If the given miplevel does not have this layer, then any higher + * miplevels won't either because miplevels only get smaller the + * higher the LOD. + */ + assert(layer < image->extent.depth); + if (layer >= anv_minify(image->extent.depth, level)) + break; + logical_z_offset_px = layer; + } else { + assert(layer < image->array_size); + logical_array_layer = layer; + logical_z_offset_px = 0; + } + + uint32_t slice_start_offset_B, slice_end_offset_B; + isl_surf_get_image_range_B_tile(isl_surf, level, + logical_array_layer, + logical_z_offset_px, + &slice_start_offset_B, + &slice_end_offset_B); + + start_offset_B = MIN2(start_offset_B, slice_start_offset_B); + end_offset_B = MAX2(end_offset_B, slice_end_offset_B); + } + + /* Aux operates 64K at a time */ + start_offset_B = align_down_u64(start_offset_B, 64 * 1024); + end_offset_B = align_u64(end_offset_B, 64 * 1024); + + for (uint64_t offset = start_offset_B; + offset < end_offset_B; offset += 64 * 1024) { + uint64_t address = base_address + offset; + + uint64_t aux_entry_addr64, *aux_entry_map; + aux_entry_map = gen_aux_map_get_entry(cmd_buffer->device->aux_map_ctx, + address, &aux_entry_addr64); + + assert(cmd_buffer->device->physical->use_softpin); + struct anv_address aux_entry_address = { + .bo = NULL, + .offset = aux_entry_addr64, + }; + + const uint64_t old_aux_entry = READ_ONCE(*aux_entry_map); + uint64_t new_aux_entry = + (old_aux_entry & GEN_AUX_MAP_ADDRESS_MASK) | format_bits; + + if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage)) + new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT; + + gen_mi_store(&b, gen_mi_mem64(aux_entry_address), + gen_mi_imm(new_aux_entry)); + } + } + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT; +} +#endif /* GEN_GEN == 12 */ + /** * @brief Transitions a color buffer from one layout to another. * @@ -1008,7 +1117,8 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, VkImageLayout initial_layout, VkImageLayout final_layout) { - const struct gen_device_info *devinfo = &cmd_buffer->device->info; + struct anv_device *device = cmd_buffer->device; + const struct gen_device_info *devinfo = &device->info; /* Validate the inputs. */ assert(cmd_buffer); assert(image && image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); @@ -1057,6 +1167,16 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, if (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED || initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { +#if GEN_GEN == 12 + if (device->physical->has_implicit_ccs && devinfo->has_aux_map) { + anv_image_init_aux_tt(cmd_buffer, image, aspect, + base_level, level_count, + base_layer, layer_count); + } +#else + assert(!(device->physical->has_implicit_ccs && devinfo->has_aux_map)); +#endif + /* A subresource in the undefined layout may have been aliased and * populated with any arrangement of bits. Therefore, we must initialize * the related aux buffer and clear buffer entry with desirable values. @@ -1139,9 +1259,9 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer, } const enum isl_aux_usage initial_aux_usage = - anv_layout_to_aux_usage(devinfo, image, aspect, initial_layout); + anv_layout_to_aux_usage(devinfo, image, aspect, 0, initial_layout); const enum isl_aux_usage final_aux_usage = - anv_layout_to_aux_usage(devinfo, image, aspect, final_layout); + anv_layout_to_aux_usage(devinfo, image, aspect, 0, final_layout); /* The current code assumes that there is no mixing of CCS_E and CCS_D. * We can handle transitions between CCS_D/E to and from NONE. What we @@ -1455,6 +1575,12 @@ genX(BeginCommandBuffer)( */ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; + /* Re-emit the aux table register in every command buffer. This way we're + * ensured that we have the table even if this command buffer doesn't + * initialize any images. + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_AUX_TABLE_INVALIDATE_BIT; + /* We send an "Indirect State Pointers Disable" packet at * EndCommandBuffer, so all push contant packets are ignored during a * context restore. Documentation says after that command, we need to @@ -1490,7 +1616,9 @@ genX(BeginCommandBuffer)( enum isl_aux_usage aux_usage = anv_layout_to_aux_usage(&cmd_buffer->device->info, iview->image, - VK_IMAGE_ASPECT_DEPTH_BIT, layout); + VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + layout); cmd_buffer->state.hiz_enabled = aux_usage == ISL_AUX_USAGE_HIZ; } @@ -1763,7 +1891,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, uint32_t l3cr; anv_pack_struct(&l3cr, L3_ALLOCATION_REG, -#if GEN_GEN < 12 +#if GEN_GEN < 11 .SLMEnable = has_slm, #endif #if GEN_GEN == 11 @@ -1865,6 +1993,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) { + UNUSED const struct gen_device_info *devinfo = &cmd_buffer->device->info; enum anv_pipe_bits bits = cmd_buffer->state.pending_pipe_bits; if (cmd_buffer->device->physical->always_flush_cache) @@ -1904,6 +2033,12 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; } + /* GEN:BUG:1409226450, Wait for EU to be idle before pipe control which + * invalidates the instruction cache + */ + if (GEN_GEN == 12 && (bits & ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)) + bits |= ANV_PIPE_CS_STALL_BIT | ANV_PIPE_STALL_AT_SCOREBOARD_BIT; + if ((GEN_GEN >= 8 && GEN_GEN <= 9) && (bits & ANV_PIPE_CS_STALL_BIT) && (bits & ANV_PIPE_VF_CACHE_INVALIDATE_BIT)) { @@ -1916,6 +2051,24 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) sizeof(cmd_buffer->state.gfx.ib_dirty_range)); } + /* Project: SKL / Argument: LRI Post Sync Operation [23] + * + * "PIPECONTROL command with “Command Streamer Stall Enable” must be + * programmed prior to programming a PIPECONTROL command with "LRI + * Post Sync Operation" in GPGPU mode of operation (i.e when + * PIPELINE_SELECT command is set to GPGPU mode of operation)." + * + * The same text exists a few rows below for Post Sync Op. + * + * On Gen12 this is GEN:BUG:1607156449. + */ + if (bits & ANV_PIPE_POST_SYNC_BIT) { + if ((GEN_GEN == 9 || (GEN_GEN == 12 && devinfo->revision == 0 /* A0 */)) && + cmd_buffer->state.current_pipeline == GPGPU) + bits |= ANV_PIPE_CS_STALL_BIT; + bits &= ~ANV_PIPE_POST_SYNC_BIT; + } + if (bits & (ANV_PIPE_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT)) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pipe) { #if GEN_GEN >= 12 @@ -2008,6 +2161,16 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) } } +#if GEN_GEN == 12 + if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && + cmd_buffer->device->info.has_aux_map) { + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = GENX(GFX_CCS_AUX_INV_num); + lri.DataDWord = 1; + } + } +#endif + bits &= ~ANV_PIPE_INVALIDATE_BITS; } @@ -2188,25 +2351,37 @@ anv_descriptor_set_address(struct anv_cmd_buffer *cmd_buffer, } } +static struct anv_cmd_pipeline_state * +pipe_state_for_stage(struct anv_cmd_buffer *cmd_buffer, + gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_COMPUTE: + return &cmd_buffer->state.compute.base; + + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_FRAGMENT: + return &cmd_buffer->state.gfx.base; + + default: + unreachable("invalid stage"); + } +} + static VkResult emit_binding_table(struct anv_cmd_buffer *cmd_buffer, gl_shader_stage stage, struct anv_state *bt_state) { struct anv_subpass *subpass = cmd_buffer->state.subpass; - struct anv_cmd_pipeline_state *pipe_state; - struct anv_pipeline *pipeline; uint32_t state_offset; - switch (stage) { - case MESA_SHADER_COMPUTE: - pipe_state = &cmd_buffer->state.compute.base; - break; - default: - pipe_state = &cmd_buffer->state.gfx.base; - break; - } - pipeline = pipe_state->pipeline; + struct anv_cmd_pipeline_state *pipe_state = + pipe_state_for_stage(cmd_buffer, stage); + struct anv_pipeline *pipeline = pipe_state->pipeline; if (!anv_pipeline_has_stage(pipeline, stage)) { *bt_state = (struct anv_state) { 0, }; @@ -2455,8 +2630,7 @@ emit_samplers(struct anv_cmd_buffer *cmd_buffer, struct anv_state *state) { struct anv_cmd_pipeline_state *pipe_state = - stage == MESA_SHADER_COMPUTE ? &cmd_buffer->state.compute.base : - &cmd_buffer->state.gfx.base; + pipe_state_for_stage(cmd_buffer, stage); struct anv_pipeline *pipeline = pipe_state->pipeline; if (!anv_pipeline_has_stage(pipeline, stage)) { @@ -2674,6 +2848,10 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer, const struct anv_pipeline_bind_map *bind_map = &pipeline->shaders[stage]->bind_map; +#if GEN_GEN >= 12 + c.MOCS = cmd_buffer->device->isl_dev.mocs.internal; +#endif + #if GEN_GEN >= 8 || GEN_IS_HASWELL /* The Skylake PRM contains the following restriction: * @@ -2734,6 +2912,7 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer, if (count == 0) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) { c.ShaderUpdateEnable = shader_mask; + c.MOCS = cmd_buffer->device->isl_dev.mocs.internal; } return; } @@ -2764,7 +2943,8 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer, dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords, GENX(3DSTATE_CONSTANT_ALL), .ShaderUpdateEnable = shader_mask, - .PointerBufferMask = buffers); + .PointerBufferMask = buffers, + .MOCS = cmd_buffer->device->isl_dev.mocs.internal); for (int i = 0; i < count; i++) { const struct anv_push_range *range = &bind_map->push_ranges[i]; @@ -2843,34 +3023,6 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.push_constants_dirty &= ~flushed; } -#if GEN_GEN >= 12 -void -genX(cmd_buffer_aux_map_state)(struct anv_cmd_buffer *cmd_buffer) -{ - void *aux_map_ctx = cmd_buffer->device->aux_map_ctx; - if (!aux_map_ctx) - return; - uint32_t aux_map_state_num = gen_aux_map_get_state_num(aux_map_ctx); - if (cmd_buffer->state.last_aux_map_state != aux_map_state_num) { - /* If the aux-map state number increased, then we need to rewrite the - * register. Rewriting the register is used to both set the aux-map - * translation table address, and also to invalidate any previously - * cached translations. - */ - uint64_t base_addr = gen_aux_map_get_base(aux_map_ctx); - anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { - lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num); - lri.DataDWord = base_addr & 0xffffffff; - } - anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { - lri.RegisterOffset = GENX(GFX_AUX_TABLE_BASE_ADDR_num) + 4; - lri.DataDWord = base_addr >> 32; - } - cmd_buffer->state.last_aux_map_state = aux_map_state_num; - } -} -#endif - void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) { @@ -2889,10 +3041,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) genX(flush_pipeline_select_3d)(cmd_buffer); -#if GEN_GEN >= 12 - genX(cmd_buffer_aux_map_state)(cmd_buffer); -#endif - if (vb_emit) { const uint32_t num_buffers = __builtin_popcount(vb_emit); const uint32_t num_dwords = 1 + num_buffers * 4; @@ -3782,10 +3930,6 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(flush_pipeline_select_gpgpu)(cmd_buffer); -#if GEN_GEN >= 12 - genX(cmd_buffer_aux_map_state)(cmd_buffer); -#endif - if (cmd_buffer->state.compute.pipeline_dirty) { /* From the Sky Lake PRM Vol 2a, MEDIA_VFE_STATE: * @@ -4511,6 +4655,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer) isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info); if (GEN_GEN >= 12) { + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + /* GEN:BUG:1408224581 * * Workaround: Gen12LP Astep only An additional pipe control with @@ -4621,7 +4768,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, (att_state->fast_clear && !att_state->clear_color_is_zero_one) || att_state->input_aux_usage != att_state->aux_usage; - VkImageLayout target_layout, target_stencil_layout; + VkImageLayout target_layout; if (iview->aspect_mask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV && !input_needs_resolve) { /* Layout transitions before the final only help to enable sampling @@ -4632,9 +4779,11 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; } else { target_layout = subpass->attachments[i].layout; - target_stencil_layout = subpass->attachments[i].stencil_layout; } + VkImageLayout target_stencil_layout = + subpass->attachments[i].stencil_layout; + uint32_t base_layer, layer_count; if (image->type == VK_IMAGE_TYPE_3D) { base_layer = 0; @@ -4658,7 +4807,9 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, att_state->current_layout, target_layout); att_state->aux_usage = anv_layout_to_aux_usage(&cmd_buffer->device->info, image, - VK_IMAGE_ASPECT_DEPTH_BIT, target_layout); + VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + target_layout); } if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { @@ -4821,7 +4972,7 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, if (GEN_GEN < 10 && (att_state->pending_load_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) && - image->planes[0].aux_surface.isl.size_B > 0 && + image->planes[0].aux_usage != ISL_AUX_USAGE_NONE && iview->planes[0].isl.base_level == 0 && iview->planes[0].isl.base_array_layer == 0) { if (att_state->aux_usage != ISL_AUX_USAGE_NONE) { @@ -4904,13 +5055,9 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer, * is set due to new association of BTI, PS Scoreboard Stall bit must * be set in this packet." */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.RenderTargetCacheFlushEnable = true; - pc.StallAtPixelScoreboard = true; -#if GEN_GEN >= 12 - pc.TileCacheFlushEnable = true; -#endif - } + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_STALL_AT_SCOREBOARD_BIT; #endif } @@ -5047,6 +5194,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) src_state->aux_usage = anv_layout_to_aux_usage(&cmd_buffer->device->info, src_iview->image, VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); src_state->current_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; @@ -5071,6 +5219,7 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) dst_state->aux_usage = anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_iview->image, VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); dst_state->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; @@ -5460,6 +5609,9 @@ void genX(CmdSetEvent)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_event, event, _event); + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { pc.StallAtPixelScoreboard = true; @@ -5484,6 +5636,9 @@ void genX(CmdResetEvent)( ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_event, event, _event); + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { pc.StallAtPixelScoreboard = true;