X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_blorp.c;h=59ad344d9116dbe4be67c100e712af0e81b46e40;hb=a8e59b37081f169a83918de149dab7c31812577c;hp=67e8d5abb042033a3444a0be4568311d4b88bd4c;hpb=643248b66a265242ef3de3dffdae79e1283eee31;p=mesa.git diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 67e8d5abb04..59ad344d911 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -73,7 +73,8 @@ upload_blorp_shader(struct blorp_batch *batch, anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, key, key_size, kernel, kernel_size, NULL, 0, - prog_data, prog_data_size, &bind_map); + prog_data, prog_data_size, + NULL, 0, NULL, &bind_map); if (!bin) return false; @@ -116,6 +117,9 @@ anv_device_init_blorp(struct anv_device *device) case 11: device->blorp.exec = gen11_blorp_exec; break; + case 12: + device->blorp.exec = gen12_blorp_exec; + break; default: unreachable("Unknown hardware generation"); } @@ -241,13 +245,37 @@ get_blorp_surf_for_anv_image(const struct anv_device *device, * buffer. We have a single global buffer that stores the 1.0 value. */ const struct anv_address clear_color_addr = (struct anv_address) { - .bo = (struct anv_bo *)&device->hiz_clear_bo + .bo = device->hiz_clear_bo, }; blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); } } } +static bool +get_blorp_surf_for_anv_shadow_image(const struct anv_device *device, + const struct anv_image *image, + VkImageAspectFlags aspect, + struct blorp_surf *blorp_surf) +{ + + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + if (image->planes[plane].shadow_surface.isl.size_B == 0) + return false; + + *blorp_surf = (struct blorp_surf) { + .surf = &image->planes[plane].shadow_surface.isl, + .addr = { + .buffer = image->planes[plane].address.bo, + .offset = image->planes[plane].address.offset + + image->planes[plane].shadow_surface.offset, + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), + }, + }; + + return true; +} + void anv_CmdCopyImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -322,6 +350,20 @@ void anv_CmdCopyImage( dstOffset.x, dstOffset.y, extent.width, extent.height); } + + struct blorp_surf dst_shadow_surf; + if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + dst_image, + 1UL << aspect_bit, + &dst_shadow_surf)) { + for (unsigned i = 0; i < layer_count; i++) { + blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, + &dst_shadow_surf, dst_level, dst_base_layer + i, + srcOffset.x, srcOffset.y, + dstOffset.x, dstOffset.y, + extent.width, extent.height); + } + } } } else { struct blorp_surf src_surf, dst_surf; @@ -342,6 +384,19 @@ void anv_CmdCopyImage( dstOffset.x, dstOffset.y, extent.width, extent.height); } + + struct blorp_surf dst_shadow_surf; + if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + dst_image, dst_mask, + &dst_shadow_surf)) { + for (unsigned i = 0; i < layer_count; i++) { + blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, + &dst_shadow_surf, dst_level, dst_base_layer + i, + srcOffset.x, srcOffset.y, + dstOffset.x, dstOffset.y, + extent.width, extent.height); + } + } } } @@ -424,11 +479,18 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, buffer_row_pitch, buffer_format, &buffer.surf, &buffer_isl_surf); + bool dst_has_shadow = false; + struct blorp_surf dst_shadow_surf; if (&image == dst) { anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image, aspect, dst->surf.aux_usage, dst->level, dst->offset.z, extent.depth); + + dst_has_shadow = + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + anv_image, aspect, + &dst_shadow_surf); } for (unsigned z = 0; z < extent.depth; z++) { @@ -437,6 +499,14 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, src->offset.x, src->offset.y, dst->offset.x, dst->offset.y, extent.width, extent.height); + if (dst_has_shadow) { + blorp_copy(&batch, &src->surf, src->level, src->offset.z, + &dst_shadow_surf, dst->level, dst->offset.z, + src->offset.x, src->offset.y, + dst->offset.x, dst->offset.y, + extent.width, extent.height); + } + image.offset.z++; buffer.surf.addr.offset += buffer_layer_stride; } @@ -475,6 +545,8 @@ void anv_CmdCopyImageToBuffer( copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, srcImageLayout, regionCount, pRegions, false); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } static bool @@ -682,6 +754,8 @@ void anv_CmdCopyBuffer( } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdUpdateBuffer( @@ -721,7 +795,7 @@ void anv_CmdUpdateBuffer( struct blorp_address src = { .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = tmp_data.offset, - .mocs = cmd_buffer->device->default_mocs, + .mocs = cmd_buffer->device->isl_dev.mocs.internal, }; struct blorp_address dst = { .buffer = dst_buffer->address.bo, @@ -737,6 +811,8 @@ void anv_CmdUpdateBuffer( } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdFillBuffer( @@ -824,6 +900,8 @@ void anv_CmdFillBuffer( } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdClearColorImage( @@ -901,7 +979,7 @@ void anv_CmdClearDepthStencilImage( struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); - struct blorp_surf depth, stencil; + struct blorp_surf depth, stencil, stencil_shadow; if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { get_blorp_surf_for_anv_image(cmd_buffer->device, image, VK_IMAGE_ASPECT_DEPTH_BIT, @@ -910,10 +988,16 @@ void anv_CmdClearDepthStencilImage( memset(&depth, 0, sizeof(depth)); } + bool has_stencil_shadow = false; if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { get_blorp_surf_for_anv_image(cmd_buffer->device, image, VK_IMAGE_ASPECT_STENCIL_BIT, imageLayout, ISL_AUX_USAGE_NONE, &stencil); + + has_stencil_shadow = + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + &stencil_shadow); } else { memset(&stencil, 0, sizeof(stencil)); } @@ -942,6 +1026,17 @@ void anv_CmdClearDepthStencilImage( clear_depth, pDepthStencil->depth, clear_stencil ? 0xff : 0, pDepthStencil->stencil); + + if (clear_stencil && has_stencil_shadow) { + union isl_color_value stencil_color = { + .u32 = { pDepthStencil->stencil, }, + }; + blorp_clear(&batch, &stencil_shadow, + ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY, + level, base_layer, layer_count, + 0, 0, level_width, level_height, + stencil_color, NULL); + } } } @@ -1004,6 +1099,7 @@ clear_color_attachment(struct anv_cmd_buffer *cmd_buffer, { const struct anv_subpass *subpass = cmd_buffer->state.subpass; const uint32_t color_att = attachment->colorAttachment; + assert(color_att < subpass->color_count); const uint32_t att_idx = subpass->color_attachments[color_att].attachment; if (att_idx == VK_ATTACHMENT_UNUSED) @@ -1065,11 +1161,11 @@ clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer, { static const union isl_color_value color_value = { .u32 = { 0, } }; const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const uint32_t att_idx = subpass->depth_stencil_attachment->attachment; - - if (att_idx == VK_ATTACHMENT_UNUSED) + if (!subpass->depth_stencil_attachment) return; + const uint32_t att_idx = subpass->depth_stencil_attachment->attachment; + assert(att_idx != VK_ATTACHMENT_UNUSED); struct anv_render_pass_attachment *pass_att = &cmd_buffer->state.pass->attachments[att_idx]; @@ -1144,8 +1240,12 @@ void anv_CmdClearAttachments( * trash our depth and stencil buffers. */ struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); + enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL; + if (cmd_buffer->state.conditional_render_enabled) { + anv_cmd_emit_conditional_render_predicate(cmd_buffer); + flags |= BLORP_BATCH_PREDICATE_ENABLE; + } + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags); for (uint32_t a = 0; a < attachmentCount; ++a) { if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { @@ -1262,11 +1362,6 @@ void anv_CmdResolveImage( const uint32_t layer_count = anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); - VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask; - VkImageAspectFlags dst_mask = pRegions[r].dstSubresource.aspectMask; - - assert(anv_image_aspects_compatible(src_mask, dst_mask)); - uint32_t aspect_bit; anv_foreach_image_aspect_bit(aspect_bit, src_image, pRegions[r].srcSubresource.aspectMask) { @@ -1310,31 +1405,32 @@ fast_clear_aux_usage(const struct anv_image *image, void anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, const struct anv_image *image, + VkImageAspectFlagBits aspect, uint32_t base_level, uint32_t level_count, uint32_t base_layer, uint32_t layer_count) { struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); - assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT && image->n_planes == 1); + /* We don't know who touched the main surface last so flush a bunch of + * caches to ensure we get good data. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | + ANV_PIPE_DATA_CACHE_FLUSH_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; struct blorp_surf surf; get_blorp_surf_for_anv_image(cmd_buffer->device, - image, VK_IMAGE_ASPECT_COLOR_BIT, + image, aspect, VK_IMAGE_LAYOUT_GENERAL, ISL_AUX_USAGE_NONE, &surf); assert(surf.aux_usage == ISL_AUX_USAGE_NONE); - struct blorp_surf shadow_surf = { - .surf = &image->planes[0].shadow_surface.isl, - .addr = { - .buffer = image->planes[0].address.bo, - .offset = image->planes[0].address.offset + - image->planes[0].shadow_surface.offset, - .mocs = anv_mocs_for_bo(cmd_buffer->device, - image->planes[0].address.bo), - }, - }; + struct blorp_surf shadow_surf; + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + image, aspect, &shadow_surf); for (uint32_t l = 0; l < level_count; l++) { const uint32_t level = base_level + l; @@ -1357,6 +1453,10 @@ anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, } } + /* We just wrote to the buffer with the render cache. Flush it. */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + blorp_batch_finish(&batch); } @@ -1427,6 +1527,13 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, ISL_AUX_USAGE_NONE, &stencil); } + /* Blorp may choose to clear stencil using RGBA32_UINT for better + * performance. If it does this, we need to flush it out of the depth + * cache before rendering to it. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + blorp_clear_depth_stencil(&batch, &depth, &stencil, level, base_layer, layer_count, area.offset.x, area.offset.y, @@ -1437,6 +1544,30 @@ anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0, stencil_value); + /* Blorp may choose to clear stencil using RGBA32_UINT for better + * performance. If it does this, we need to flush it out of the render + * cache before someone starts trying to do stencil on it. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + + struct blorp_surf stencil_shadow; + if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + &stencil_shadow)) { + union isl_color_value stencil_color = { + .u32 = { stencil_value }, + }; + blorp_clear(&batch, &stencil_shadow, + ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY, + level, base_layer, layer_count, + area.offset.x, area.offset.y, + area.offset.x + area.extent.width, + area.offset.y + area.extent.height, + stencil_color, NULL); + } + blorp_batch_finish(&batch); } @@ -1568,7 +1699,8 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0); + BLORP_BATCH_PREDICATE_ENABLE * predicate + + BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value); struct blorp_surf surf; get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, @@ -1577,17 +1709,10 @@ anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, /* Blorp will store the clear color for us if we provide the clear color * address and we are doing a fast clear. So we save the clear value into - * the blorp surface. However, in some situations we want to do a fast clear - * without changing the clear value stored in the state buffer. For those - * cases, we set the clear color address pointer to NULL, so blorp will not - * try to store a garbage color. + * the blorp surface. */ - if (mcs_op == ISL_AUX_OP_FAST_CLEAR) { - if (clear_value) - surf.clear_color = *clear_value; - else - surf.clear_color_addr.buffer = NULL; - } + if (clear_value) + surf.clear_color = *clear_value; /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": * @@ -1654,7 +1779,8 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0); + BLORP_BATCH_PREDICATE_ENABLE * predicate + + BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value); struct blorp_surf surf; get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, @@ -1664,17 +1790,10 @@ anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, /* Blorp will store the clear color for us if we provide the clear color * address and we are doing a fast clear. So we save the clear value into - * the blorp surface. However, in some situations we want to do a fast clear - * without changing the clear value stored in the state buffer. For those - * cases, we set the clear color address pointer to NULL, so blorp will not - * try to store a garbage color. + * the blorp surface. */ - if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { - if (clear_value) - surf.clear_color = *clear_value; - else - surf.clear_color_addr.buffer = NULL; - } + if (clear_value) + surf.clear_color = *clear_value; /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": *