X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_blorp.c;h=8570879f37abe221ee3e0edf0fd8978d9c6954e9;hb=4d44848c470c9d214c03906d8decd8056829c4ce;hp=7f51bedb76f09c7d12d0825a7f4f0ea928200b08;hpb=2c8058fb68a1e8cbc835272bd70c65eeac0779b9;p=mesa.git diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 7f51bedb76f..8570879f37a 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -24,17 +24,18 @@ #include "anv_private.h" static bool -lookup_blorp_shader(struct blorp_context *blorp, +lookup_blorp_shader(struct blorp_batch *batch, const void *key, uint32_t key_size, uint32_t *kernel_out, void *prog_data_out) { + struct blorp_context *blorp = batch->blorp; struct anv_device *device = blorp->driver_ctx; - /* The blorp cache must be a real cache */ - assert(device->blorp_shader_cache.cache); + /* The default cache must be a real cache */ + assert(device->default_pipeline_cache.cache); struct anv_shader_bin *bin = - anv_pipeline_cache_search(&device->blorp_shader_cache, key, key_size); + anv_pipeline_cache_search(&device->default_pipeline_cache, key, key_size); if (!bin) return false; @@ -50,17 +51,18 @@ lookup_blorp_shader(struct blorp_context *blorp, } static bool -upload_blorp_shader(struct blorp_context *blorp, +upload_blorp_shader(struct blorp_batch *batch, uint32_t stage, const void *key, uint32_t key_size, const void *kernel, uint32_t kernel_size, const struct brw_stage_prog_data *prog_data, uint32_t prog_data_size, uint32_t *kernel_out, void *prog_data_out) { + struct blorp_context *blorp = batch->blorp; struct anv_device *device = blorp->driver_ctx; /* The blorp cache must be a real cache */ - assert(device->blorp_shader_cache.cache); + assert(device->default_pipeline_cache.cache); struct anv_pipeline_bind_map bind_map = { .surface_count = 0, @@ -68,9 +70,11 @@ upload_blorp_shader(struct blorp_context *blorp, }; struct anv_shader_bin *bin = - anv_pipeline_cache_upload_kernel(&device->blorp_shader_cache, + anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, stage, key, key_size, kernel, kernel_size, - prog_data, prog_data_size, &bind_map); + NULL, 0, + prog_data, prog_data_size, + NULL, 0, NULL, &bind_map); if (!bin) return false; @@ -89,12 +93,8 @@ upload_blorp_shader(struct blorp_context *blorp, void anv_device_init_blorp(struct anv_device *device) { - anv_pipeline_cache_init(&device->blorp_shader_cache, device, true); blorp_init(&device->blorp, device, &device->isl_dev); - device->blorp.compiler = device->instance->physicalDevice.compiler; - device->blorp.mocs.tex = device->default_mocs; - device->blorp.mocs.rb = device->default_mocs; - device->blorp.mocs.vb = device->default_mocs; + device->blorp.compiler = device->physical->compiler; device->blorp.lookup_shader = lookup_blorp_shader; device->blorp.upload_shader = upload_blorp_shader; switch (device->info.gen) { @@ -114,6 +114,12 @@ anv_device_init_blorp(struct anv_device *device) case 10: device->blorp.exec = gen10_blorp_exec; break; + case 11: + device->blorp.exec = gen11_blorp_exec; + break; + case 12: + device->blorp.exec = gen12_blorp_exec; + break; default: unreachable("Unknown hardware generation"); } @@ -123,7 +129,6 @@ void anv_device_finish_blorp(struct anv_device *device) { blorp_finish(&device->blorp); - anv_pipeline_cache_finish(&device->blorp_shader_cache); } static void @@ -154,8 +159,9 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, *blorp_surf = (struct blorp_surf) { .surf = isl_surf, .addr = { - .buffer = buffer->bo, - .offset = buffer->offset + offset, + .buffer = buffer->address.bo, + .offset = buffer->address.offset + offset, + .mocs = anv_mocs_for_bo(device, buffer->address.bo), }, }; @@ -168,44 +174,119 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, .levels = 1, .array_len = 1, .samples = 1, - .row_pitch = row_pitch, + .row_pitch_B = row_pitch, .usage = ISL_SURF_USAGE_TEXTURE_BIT | ISL_SURF_USAGE_RENDER_TARGET_BIT, .tiling_flags = ISL_TILING_LINEAR_BIT); assert(ok); } +/* Pick something high enough that it won't be used in core and low enough it + * will never map to an extension. + */ +#define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000 + +static struct blorp_address +anv_to_blorp_address(struct anv_address addr) +{ + return (struct blorp_address) { + .buffer = addr.bo, + .offset = addr.offset, + }; +} + static void -get_blorp_surf_for_anv_image(const struct anv_image *image, +get_blorp_surf_for_anv_image(const struct anv_device *device, + const struct anv_image *image, VkImageAspectFlags aspect, + VkImageUsageFlags usage, + VkImageLayout layout, enum isl_aux_usage aux_usage, struct blorp_surf *blorp_surf) { - if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT || - aux_usage == ISL_AUX_USAGE_HIZ) - aux_usage = ISL_AUX_USAGE_NONE; + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); - const struct anv_surface *surface = - anv_image_get_surface_for_aspect_mask(image, aspect); + if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) { + assert(usage != 0); + aux_usage = anv_layout_to_aux_usage(&device->info, image, + aspect, usage, layout); + } + const struct anv_surface *surface = &image->planes[plane].surface; *blorp_surf = (struct blorp_surf) { .surf = &surface->isl, .addr = { - .buffer = image->bo, - .offset = image->offset + surface->offset, + .buffer = image->planes[plane].address.bo, + .offset = image->planes[plane].address.offset + surface->offset, + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), }, }; if (aux_usage != ISL_AUX_USAGE_NONE) { - blorp_surf->aux_surf = &image->aux_surface.isl, + const struct anv_surface *aux_surface = &image->planes[plane].aux_surface; + blorp_surf->aux_surf = &aux_surface->isl, blorp_surf->aux_addr = (struct blorp_address) { - .buffer = image->bo, - .offset = image->offset + image->aux_surface.offset, + .buffer = image->planes[plane].address.bo, + .offset = image->planes[plane].address.offset + aux_surface->offset, + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), }; blorp_surf->aux_usage = aux_usage; + + /* If we're doing a partial resolve, then we need the indirect clear + * color. If we are doing a fast clear and want to store/update the + * clear color, we also pass the address to blorp, otherwise it will only + * stomp the CCS to a particular value and won't care about format or + * clear value + */ + if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { + const struct anv_address clear_color_addr = + anv_image_get_clear_color_addr(device, image, aspect); + blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr); + } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { + if (device->info.gen >= 10) { + /* Vulkan always clears to 1.0. On gen < 10, we set that directly + * in the state packet. For gen >= 10, must provide the clear + * value in a buffer. We have a single global buffer that stores + * the 1.0 value. + */ + const struct anv_address clear_color_addr = (struct anv_address) { + .bo = device->hiz_clear_bo, + }; + blorp_surf->clear_color_addr = + anv_to_blorp_address(clear_color_addr); + } else { + blorp_surf->clear_color = (union isl_color_value) { + .f32 = { ANV_HZ_FC_VAL }, + }; + } + } } } +static bool +get_blorp_surf_for_anv_shadow_image(const struct anv_device *device, + const struct anv_image *image, + VkImageAspectFlags aspect, + struct blorp_surf *blorp_surf) +{ + + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + if (image->planes[plane].shadow_surface.isl.size_B == 0) + return false; + + *blorp_surf = (struct blorp_surf) { + .surf = &image->planes[plane].shadow_surface.isl, + .addr = { + .buffer = image->planes[plane].address.bo, + .offset = image->planes[plane].address.offset + + image->planes[plane].shadow_surface.offset, + .mocs = anv_mocs_for_bo(device, image->planes[plane].address.bo), + }, + }; + + return true; +} + void anv_CmdCopyImage( VkCommandBuffer commandBuffer, VkImage srcImage, @@ -230,6 +311,7 @@ void anv_CmdCopyImage( VkExtent3D extent = anv_sanitize_image_extent(src_image->type, pRegions[r].extent); + const uint32_t dst_level = pRegions[r].dstSubresource.mipLevel; unsigned dst_base_layer, layer_count; if (dst_image->type == VK_IMAGE_TYPE_3D) { dst_base_layer = pRegions[r].dstOffset.z; @@ -240,6 +322,7 @@ void anv_CmdCopyImage( anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); } + const uint32_t src_level = pRegions[r].srcSubresource.mipLevel; unsigned src_base_layer; if (src_image->type == VK_IMAGE_TYPE_3D) { src_base_layer = pRegions[r].srcOffset.z; @@ -249,38 +332,115 @@ void anv_CmdCopyImage( anv_get_layerCount(src_image, &pRegions[r].srcSubresource)); } - assert(pRegions[r].srcSubresource.aspectMask == - pRegions[r].dstSubresource.aspectMask); - - uint32_t a; - for_each_bit(a, pRegions[r].dstSubresource.aspectMask) { - VkImageAspectFlagBits aspect = (1 << a); + VkImageAspectFlags src_mask = pRegions[r].srcSubresource.aspectMask, + dst_mask = pRegions[r].dstSubresource.aspectMask; + + assert(anv_image_aspects_compatible(src_mask, dst_mask)); + + if (util_bitcount(src_mask) > 1) { + uint32_t aspect_bit; + anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) { + struct blorp_surf src_surf, dst_surf; + get_blorp_surf_for_anv_image(cmd_buffer->device, + src_image, 1UL << aspect_bit, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + srcImageLayout, ISL_AUX_USAGE_NONE, + &src_surf); + get_blorp_surf_for_anv_image(cmd_buffer->device, + dst_image, 1UL << aspect_bit, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + dstImageLayout, ISL_AUX_USAGE_NONE, + &dst_surf); + anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, + 1UL << aspect_bit, + dst_surf.aux_usage, dst_level, + dst_base_layer, layer_count); + + for (unsigned i = 0; i < layer_count; i++) { + blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, + &dst_surf, dst_level, dst_base_layer + i, + srcOffset.x, srcOffset.y, + dstOffset.x, dstOffset.y, + extent.width, extent.height); + } + struct blorp_surf dst_shadow_surf; + if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + dst_image, + 1UL << aspect_bit, + &dst_shadow_surf)) { + for (unsigned i = 0; i < layer_count; i++) { + blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, + &dst_shadow_surf, dst_level, dst_base_layer + i, + srcOffset.x, srcOffset.y, + dstOffset.x, dstOffset.y, + extent.width, extent.height); + } + } + } + } else { struct blorp_surf src_surf, dst_surf; - get_blorp_surf_for_anv_image(src_image, aspect, src_image->aux_usage, + get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + srcImageLayout, ISL_AUX_USAGE_NONE, &src_surf); - get_blorp_surf_for_anv_image(dst_image, aspect, dst_image->aux_usage, + get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + dstImageLayout, ISL_AUX_USAGE_NONE, &dst_surf); + anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask, + dst_surf.aux_usage, dst_level, + dst_base_layer, layer_count); for (unsigned i = 0; i < layer_count; i++) { - blorp_copy(&batch, &src_surf, pRegions[r].srcSubresource.mipLevel, - src_base_layer + i, - &dst_surf, pRegions[r].dstSubresource.mipLevel, - dst_base_layer + i, + blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, + &dst_surf, dst_level, dst_base_layer + i, srcOffset.x, srcOffset.y, dstOffset.x, dstOffset.y, extent.width, extent.height); } + + struct blorp_surf dst_shadow_surf; + if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + dst_image, dst_mask, + &dst_shadow_surf)) { + for (unsigned i = 0; i < layer_count; i++) { + blorp_copy(&batch, &src_surf, src_level, src_base_layer + i, + &dst_shadow_surf, dst_level, dst_base_layer + i, + srcOffset.x, srcOffset.y, + dstOffset.x, dstOffset.y, + extent.width, extent.height); + } + } } } blorp_batch_finish(&batch); } +static enum isl_format +isl_format_for_size(unsigned size_B) +{ + /* Prefer 32-bit per component formats for CmdFillBuffer */ + switch (size_B) { + case 1: return ISL_FORMAT_R8_UINT; + case 2: return ISL_FORMAT_R16_UINT; + case 3: return ISL_FORMAT_R8G8B8_UINT; + case 4: return ISL_FORMAT_R32_UINT; + case 6: return ISL_FORMAT_R16G16B16_UINT; + case 8: return ISL_FORMAT_R32G32_UINT; + case 12: return ISL_FORMAT_R32G32B32_UINT; + case 16: return ISL_FORMAT_R32G32B32A32_UINT; + default: + unreachable("Unknown format size"); + } +} + static void copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, struct anv_buffer *anv_buffer, struct anv_image *anv_image, + VkImageLayout image_layout, uint32_t regionCount, const VkBufferImageCopy* pRegions, bool buffer_to_image) @@ -308,7 +468,11 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, for (unsigned r = 0; r < regionCount; r++) { const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; - get_blorp_surf_for_anv_image(anv_image, aspect, anv_image->aux_usage, + get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect, + buffer_to_image ? + VK_IMAGE_USAGE_TRANSFER_DST_BIT : + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + image_layout, ISL_AUX_USAGE_NONE, &image.surf); image.offset = anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset); @@ -322,41 +486,88 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, anv_get_layerCount(anv_image, &pRegions[r].imageSubresource); } - const enum isl_format buffer_format = + const enum isl_format linear_format = anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk_format, aspect, VK_IMAGE_TILING_LINEAR); + const struct isl_format_layout *linear_fmtl = + isl_format_get_layout(linear_format); - const VkExtent3D bufferImageExtent = { - .width = pRegions[r].bufferRowLength ? - pRegions[r].bufferRowLength : extent.width, - .height = pRegions[r].bufferImageHeight ? - pRegions[r].bufferImageHeight : extent.height, - }; + const uint32_t buffer_row_length = + pRegions[r].bufferRowLength ? + pRegions[r].bufferRowLength : extent.width; - const struct isl_format_layout *buffer_fmtl = - isl_format_get_layout(buffer_format); + const uint32_t buffer_image_height = + pRegions[r].bufferImageHeight ? + pRegions[r].bufferImageHeight : extent.height; const uint32_t buffer_row_pitch = - DIV_ROUND_UP(bufferImageExtent.width, buffer_fmtl->bw) * - (buffer_fmtl->bpb / 8); + DIV_ROUND_UP(buffer_row_length, linear_fmtl->bw) * + (linear_fmtl->bpb / 8); const uint32_t buffer_layer_stride = - DIV_ROUND_UP(bufferImageExtent.height, buffer_fmtl->bh) * + DIV_ROUND_UP(buffer_image_height, linear_fmtl->bh) * buffer_row_pitch; + /* Some formats have additional restrictions which may cause ISL to + * fail to create a surface for us. Some examples include: + * + * 1. ASTC formats are not allowed to be LINEAR and must be tiled + * 2. YCbCr formats have to have 2-pixel aligned strides + * + * To avoid these issues, we always bind the buffer as if it's a + * "normal" format like RGBA32_UINT. Since we're using blorp_copy, + * the format doesn't matter as long as it has the right bpb. + */ + const VkExtent2D buffer_extent = { + .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw), + .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh), + }; + const enum isl_format buffer_format = + isl_format_for_size(linear_fmtl->bpb / 8); + struct isl_surf buffer_isl_surf; get_blorp_surf_for_anv_buffer(cmd_buffer->device, anv_buffer, pRegions[r].bufferOffset, - extent.width, extent.height, + buffer_extent.width, buffer_extent.height, buffer_row_pitch, buffer_format, &buffer.surf, &buffer_isl_surf); + bool dst_has_shadow = false; + struct blorp_surf dst_shadow_surf; + if (&image == dst) { + /* In this case, the source is the buffer and, since blorp takes its + * copy dimensions in terms of the source format, we have to use the + * scaled down version for compressed textures because the source + * format is an RGB format. + */ + extent.width = buffer_extent.width; + extent.height = buffer_extent.height; + + anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image, + aspect, dst->surf.aux_usage, + dst->level, + dst->offset.z, extent.depth); + + dst_has_shadow = + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + anv_image, aspect, + &dst_shadow_surf); + } + for (unsigned z = 0; z < extent.depth; z++) { blorp_copy(&batch, &src->surf, src->level, src->offset.z, &dst->surf, dst->level, dst->offset.z, src->offset.x, src->offset.y, dst->offset.x, dst->offset.y, extent.width, extent.height); + if (dst_has_shadow) { + blorp_copy(&batch, &src->surf, src->level, src->offset.z, + &dst_shadow_surf, dst->level, dst->offset.z, + src->offset.x, src->offset.y, + dst->offset.x, dst->offset.y, + extent.width, extent.height); + } + image.offset.z++; buffer.surf.addr.offset += buffer_layer_stride; } @@ -377,7 +588,7 @@ void anv_CmdCopyBufferToImage( ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer); ANV_FROM_HANDLE(anv_image, dst_image, dstImage); - copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, + copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, dstImageLayout, regionCount, pRegions, true); } @@ -393,8 +604,10 @@ void anv_CmdCopyImageToBuffer( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer); - copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, + copy_buffer_to_image(cmd_buffer, dst_buffer, src_image, srcImageLayout, regionCount, pRegions, false); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } static bool @@ -435,13 +648,13 @@ void anv_CmdBlitImage( struct blorp_surf src, dst; - uint32_t gl_filter; + enum blorp_filter blorp_filter; switch (filter) { case VK_FILTER_NEAREST: - gl_filter = 0x2600; /* GL_NEAREST */ + blorp_filter = BLORP_FILTER_NEAREST; break; case VK_FILTER_LINEAR: - gl_filter = 0x2601; /* GL_LINEAR */ + blorp_filter = BLORP_FILTER_BILINEAR; break; default: unreachable("Invalid filter"); @@ -454,93 +667,93 @@ void anv_CmdBlitImage( const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource; const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource; - get_blorp_surf_for_anv_image(src_image, src_res->aspectMask, - src_image->aux_usage, &src); - get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask, - dst_image->aux_usage, &dst); - - struct anv_format src_format = - anv_get_format(&cmd_buffer->device->info, src_image->vk_format, - src_res->aspectMask, src_image->tiling); - struct anv_format dst_format = - anv_get_format(&cmd_buffer->device->info, dst_image->vk_format, - dst_res->aspectMask, dst_image->tiling); - - unsigned dst_start, dst_end; - if (dst_image->type == VK_IMAGE_TYPE_3D) { - assert(dst_res->baseArrayLayer == 0); - dst_start = pRegions[r].dstOffsets[0].z; - dst_end = pRegions[r].dstOffsets[1].z; - } else { - dst_start = dst_res->baseArrayLayer; - dst_end = dst_start + anv_get_layerCount(dst_image, dst_res); - } + assert(anv_image_aspects_compatible(src_res->aspectMask, + dst_res->aspectMask)); + + uint32_t aspect_bit; + anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) { + get_blorp_surf_for_anv_image(cmd_buffer->device, + src_image, 1U << aspect_bit, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + srcImageLayout, ISL_AUX_USAGE_NONE, &src); + get_blorp_surf_for_anv_image(cmd_buffer->device, + dst_image, 1U << aspect_bit, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + dstImageLayout, ISL_AUX_USAGE_NONE, &dst); + + struct anv_format_plane src_format = + anv_get_format_plane(&cmd_buffer->device->info, src_image->vk_format, + 1U << aspect_bit, src_image->tiling); + struct anv_format_plane dst_format = + anv_get_format_plane(&cmd_buffer->device->info, dst_image->vk_format, + 1U << aspect_bit, dst_image->tiling); + + unsigned dst_start, dst_end; + if (dst_image->type == VK_IMAGE_TYPE_3D) { + assert(dst_res->baseArrayLayer == 0); + dst_start = pRegions[r].dstOffsets[0].z; + dst_end = pRegions[r].dstOffsets[1].z; + } else { + dst_start = dst_res->baseArrayLayer; + dst_end = dst_start + anv_get_layerCount(dst_image, dst_res); + } - unsigned src_start, src_end; - if (src_image->type == VK_IMAGE_TYPE_3D) { - assert(src_res->baseArrayLayer == 0); - src_start = pRegions[r].srcOffsets[0].z; - src_end = pRegions[r].srcOffsets[1].z; - } else { - src_start = src_res->baseArrayLayer; - src_end = src_start + anv_get_layerCount(src_image, src_res); - } + unsigned src_start, src_end; + if (src_image->type == VK_IMAGE_TYPE_3D) { + assert(src_res->baseArrayLayer == 0); + src_start = pRegions[r].srcOffsets[0].z; + src_end = pRegions[r].srcOffsets[1].z; + } else { + src_start = src_res->baseArrayLayer; + src_end = src_start + anv_get_layerCount(src_image, src_res); + } - bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); - float src_z_step = (float)(src_end + 1 - src_start) / - (float)(dst_end + 1 - dst_start); + bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end); + float src_z_step = (float)(src_end + 1 - src_start) / + (float)(dst_end + 1 - dst_start); - if (flip_z) { - src_start = src_end; - src_z_step *= -1; - } + if (flip_z) { + src_start = src_end; + src_z_step *= -1; + } - unsigned src_x0 = pRegions[r].srcOffsets[0].x; - unsigned src_x1 = pRegions[r].srcOffsets[1].x; - unsigned dst_x0 = pRegions[r].dstOffsets[0].x; - unsigned dst_x1 = pRegions[r].dstOffsets[1].x; - bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1); - - unsigned src_y0 = pRegions[r].srcOffsets[0].y; - unsigned src_y1 = pRegions[r].srcOffsets[1].y; - unsigned dst_y0 = pRegions[r].dstOffsets[0].y; - unsigned dst_y1 = pRegions[r].dstOffsets[1].y; - bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); - - const unsigned num_layers = dst_end - dst_start; - for (unsigned i = 0; i < num_layers; i++) { - unsigned dst_z = dst_start + i; - unsigned src_z = src_start + i * src_z_step; - - blorp_blit(&batch, &src, src_res->mipLevel, src_z, - src_format.isl_format, src_format.swizzle, - &dst, dst_res->mipLevel, dst_z, - dst_format.isl_format, - anv_swizzle_for_render(dst_format.swizzle), - src_x0, src_y0, src_x1, src_y1, - dst_x0, dst_y0, dst_x1, dst_y1, - gl_filter, flip_x, flip_y); + unsigned src_x0 = pRegions[r].srcOffsets[0].x; + unsigned src_x1 = pRegions[r].srcOffsets[1].x; + unsigned dst_x0 = pRegions[r].dstOffsets[0].x; + unsigned dst_x1 = pRegions[r].dstOffsets[1].x; + bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1); + + unsigned src_y0 = pRegions[r].srcOffsets[0].y; + unsigned src_y1 = pRegions[r].srcOffsets[1].y; + unsigned dst_y0 = pRegions[r].dstOffsets[0].y; + unsigned dst_y1 = pRegions[r].dstOffsets[1].y; + bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1); + + const unsigned num_layers = dst_end - dst_start; + anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, + 1U << aspect_bit, + dst.aux_usage, + dst_res->mipLevel, + dst_start, num_layers); + + for (unsigned i = 0; i < num_layers; i++) { + unsigned dst_z = dst_start + i; + unsigned src_z = src_start + i * src_z_step; + + blorp_blit(&batch, &src, src_res->mipLevel, src_z, + src_format.isl_format, src_format.swizzle, + &dst, dst_res->mipLevel, dst_z, + dst_format.isl_format, dst_format.swizzle, + src_x0, src_y0, src_x1, src_y1, + dst_x0, dst_y0, dst_x1, dst_y1, + blorp_filter, flip_x, flip_y); + } } - } blorp_batch_finish(&batch); } -static enum isl_format -isl_format_for_size(unsigned size_B) -{ - switch (size_B) { - case 1: return ISL_FORMAT_R8_UINT; - case 2: return ISL_FORMAT_R8G8_UINT; - case 4: return ISL_FORMAT_R8G8B8A8_UINT; - case 8: return ISL_FORMAT_R16G16B16A16_UINT; - case 16: return ISL_FORMAT_R32G32B32A32_UINT; - default: - unreachable("Not a power-of-two format size"); - } -} - /** * Returns the greatest common divisor of a and b that is a power of two. */ @@ -578,18 +791,22 @@ void anv_CmdCopyBuffer( for (unsigned r = 0; r < regionCount; r++) { struct blorp_address src = { - .buffer = src_buffer->bo, - .offset = src_buffer->offset + pRegions[r].srcOffset, + .buffer = src_buffer->address.bo, + .offset = src_buffer->address.offset + pRegions[r].srcOffset, + .mocs = anv_mocs_for_bo(cmd_buffer->device, src_buffer->address.bo), }; struct blorp_address dst = { - .buffer = dst_buffer->bo, - .offset = dst_buffer->offset + pRegions[r].dstOffset, + .buffer = dst_buffer->address.bo, + .offset = dst_buffer->address.offset + pRegions[r].dstOffset, + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo), }; blorp_buffer_copy(&batch, src, dst, pRegions[r].size); } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdUpdateBuffer( @@ -626,15 +843,15 @@ void anv_CmdUpdateBuffer( memcpy(tmp_data.map, pData, copy_size); - anv_state_flush(cmd_buffer->device, tmp_data); - struct blorp_address src = { - .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo, .offset = tmp_data.offset, + .mocs = cmd_buffer->device->isl_dev.mocs.internal, }; struct blorp_address dst = { - .buffer = dst_buffer->bo, - .offset = dst_buffer->offset + dstOffset, + .buffer = dst_buffer->address.bo, + .offset = dst_buffer->address.offset + dstOffset, + .mocs = anv_mocs_for_bo(cmd_buffer->device, dst_buffer->address.bo), }; blorp_buffer_copy(&batch, src, dst, copy_size); @@ -645,6 +862,8 @@ void anv_CmdUpdateBuffer( } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdFillBuffer( @@ -732,6 +951,8 @@ void anv_CmdFillBuffer( } blorp_batch_finish(&batch); + + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES; } void anv_CmdClearColorImage( @@ -750,19 +971,22 @@ void anv_CmdClearColorImage( struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); - struct blorp_surf surf; - get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, - image->aux_usage, &surf); for (unsigned r = 0; r < rangeCount; r++) { if (pRanges[r].aspectMask == 0) continue; - assert(pRanges[r].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); + assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); - struct anv_format src_format = - anv_get_format(&cmd_buffer->device->info, image->vk_format, - VK_IMAGE_ASPECT_COLOR_BIT, image->tiling); + struct blorp_surf surf; + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, pRanges[r].aspectMask, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + imageLayout, ISL_AUX_USAGE_NONE, &surf); + + struct anv_format_plane src_format = + anv_get_format_plane(&cmd_buffer->device->info, image->vk_format, + VK_IMAGE_ASPECT_COLOR_BIT, image->tiling); unsigned base_layer = pRanges[r].baseArrayLayer; unsigned layer_count = anv_get_layerCount(image, &pRanges[r]); @@ -777,6 +1001,11 @@ void anv_CmdClearColorImage( layer_count = anv_minify(image->extent.depth, level); } + anv_cmd_buffer_mark_image_written(cmd_buffer, image, + pRanges[r].aspectMask, + surf.aux_usage, level, + base_layer, layer_count); + blorp_clear(&batch, &surf, src_format.isl_format, src_format.swizzle, level, base_layer, layer_count, @@ -802,17 +1031,27 @@ void anv_CmdClearDepthStencilImage( struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); - struct blorp_surf depth, stencil; + struct blorp_surf depth, stencil, stencil_shadow; if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT, - ISL_AUX_USAGE_NONE, &depth); + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, VK_IMAGE_ASPECT_DEPTH_BIT, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + imageLayout, ISL_AUX_USAGE_NONE, &depth); } else { memset(&depth, 0, sizeof(depth)); } + bool has_stencil_shadow = false; if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT, - ISL_AUX_USAGE_NONE, &stencil); + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, VK_IMAGE_ASPECT_STENCIL_BIT, + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + imageLayout, ISL_AUX_USAGE_NONE, &stencil); + + has_stencil_shadow = + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + &stencil_shadow); } else { memset(&stencil, 0, sizeof(stencil)); } @@ -841,6 +1080,17 @@ void anv_CmdClearDepthStencilImage( clear_depth, pDepthStencil->depth, clear_stencil ? 0xff : 0, pDepthStencil->stencil); + + if (clear_stencil && has_stencil_shadow) { + union isl_color_value stencil_color = { + .u32 = { pDepthStencil->stencil, }, + }; + blorp_clear(&batch, &stencil_shadow, + ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY, + level, base_layer, layer_count, + 0, 0, level_width, level_height, + stencil_color, NULL); + } } } @@ -903,6 +1153,7 @@ clear_color_attachment(struct anv_cmd_buffer *cmd_buffer, { const struct anv_subpass *subpass = cmd_buffer->state.subpass; const uint32_t color_att = attachment->colorAttachment; + assert(color_att < subpass->color_count); const uint32_t att_idx = subpass->color_attachments[color_att].attachment; if (att_idx == VK_ATTACHMENT_UNUSED) @@ -945,6 +1196,7 @@ clear_color_attachment(struct anv_cmd_buffer *cmd_buffer, for (uint32_t r = 0; r < rectCount; ++r) { const VkOffset2D offset = pRects[r].rect.offset; const VkExtent2D extent = pRects[r].rect.extent; + assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS); blorp_clear_attachments(batch, binding_table, ISL_FORMAT_UNSUPPORTED, pass_att->samples, pRects[r].baseArrayLayer, @@ -963,11 +1215,11 @@ clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer, { static const union isl_color_value color_value = { .u32 = { 0, } }; const struct anv_subpass *subpass = cmd_buffer->state.subpass; - const uint32_t att_idx = subpass->depth_stencil_attachment.attachment; - - if (att_idx == VK_ATTACHMENT_UNUSED) + if (!subpass->depth_stencil_attachment) return; + const uint32_t att_idx = subpass->depth_stencil_attachment->attachment; + assert(att_idx != VK_ATTACHMENT_UNUSED); struct anv_render_pass_attachment *pass_att = &cmd_buffer->state.pass->attachments[att_idx]; @@ -1016,6 +1268,7 @@ clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer, const VkOffset2D offset = pRects[r].rect.offset; const VkExtent2D extent = pRects[r].rect.extent; VkClearDepthStencilValue value = attachment->clearValue.depthStencil; + assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS); blorp_clear_attachments(batch, binding_table, depth_format, pass_att->samples, pRects[r].baseArrayLayer, @@ -1041,11 +1294,16 @@ void anv_CmdClearAttachments( * trash our depth and stencil buffers. */ struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); + enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL; + if (cmd_buffer->state.conditional_render_enabled) { + anv_cmd_emit_conditional_render_predicate(cmd_buffer); + flags |= BLORP_BATCH_PREDICATE_ENABLE; + } + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, flags); for (uint32_t a = 0; a < attachmentCount; ++a) { - if (pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { + if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { + assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT); clear_color_attachment(cmd_buffer, &batch, &pAttachments[a], rectCount, pRects); @@ -1065,254 +1323,73 @@ enum subpass_stage { SUBPASS_STAGE_RESOLVE, }; -static bool -subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer) -{ - const struct anv_cmd_state *cmd_state = &cmd_buffer->state; - uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment; - - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - uint32_t a = cmd_state->subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - assert(a < cmd_state->pass->attachment_count); - if (cmd_state->attachments[a].pending_clear_aspects) { - return true; - } - } - - if (ds != VK_ATTACHMENT_UNUSED) { - assert(ds < cmd_state->pass->attachment_count); - if (cmd_state->attachments[ds].pending_clear_aspects) - return true; - } - - return false; -} - void -anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) +anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *src_image, + enum isl_aux_usage src_aux_usage, + uint32_t src_level, uint32_t src_base_layer, + const struct anv_image *dst_image, + enum isl_aux_usage dst_aux_usage, + uint32_t dst_level, uint32_t dst_base_layer, + VkImageAspectFlagBits aspect, + uint32_t src_x, uint32_t src_y, + uint32_t dst_x, uint32_t dst_y, + uint32_t width, uint32_t height, + uint32_t layer_count, + enum blorp_filter filter) { - const struct anv_cmd_state *cmd_state = &cmd_buffer->state; - const VkRect2D render_area = cmd_buffer->state.render_area; - - - if (!subpass_needs_clear(cmd_buffer)) - return; - - /* Because this gets called within a render pass, we tell blorp not to - * trash our depth and stencil buffers. - */ struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL); - - VkClearRect clear_rect = { - .rect = cmd_buffer->state.render_area, - .baseArrayLayer = 0, - .layerCount = cmd_buffer->state.framebuffer->layers, - }; - - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) { - const uint32_t a = cmd_state->subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - assert(a < cmd_state->pass->attachment_count); - struct anv_attachment_state *att_state = &cmd_state->attachments[a]; - - if (!att_state->pending_clear_aspects) - continue; - - assert(att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - - struct anv_image_view *iview = fb->attachments[a]; - const struct anv_image *image = iview->image; - struct blorp_surf surf; - get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, - att_state->aux_usage, &surf); - - if (att_state->fast_clear) { - surf.clear_color = vk_to_isl_color(att_state->clear_value.color); - - /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": - * - * "After Render target fast clear, pipe-control with color cache - * write-flush must be issued before sending any DRAW commands on - * that render target." - * - * This comment is a bit cryptic and doesn't really tell you what's - * going or what's really needed. It appears that fast clear ops are - * not properly synchronized with other drawing. This means that we - * cannot have a fast clear operation in the pipe at the same time as - * other regular drawing operations. We need to use a PIPE_CONTROL - * to ensure that the contents of the previous draw hit the render - * target before we resolve and then use a second PIPE_CONTROL after - * the resolve to ensure that it is completed before any additional - * drawing occurs. - */ - cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; - - blorp_fast_clear(&batch, &surf, iview->isl.format, - iview->isl.base_level, - iview->isl.base_array_layer, fb->layers, - render_area.offset.x, render_area.offset.y, - render_area.offset.x + render_area.extent.width, - render_area.offset.y + render_area.extent.height); - - cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; - } else { - blorp_clear(&batch, &surf, iview->isl.format, - anv_swizzle_for_render(iview->isl.swizzle), - iview->isl.base_level, - iview->isl.base_array_layer, fb->layers, - render_area.offset.x, render_area.offset.y, - render_area.offset.x + render_area.extent.width, - render_area.offset.y + render_area.extent.height, - vk_to_isl_color(att_state->clear_value.color), NULL); - } - - att_state->pending_clear_aspects = 0; - } - - const uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment; - assert(ds == VK_ATTACHMENT_UNUSED || ds < cmd_state->pass->attachment_count); - - if (ds != VK_ATTACHMENT_UNUSED && - cmd_state->attachments[ds].pending_clear_aspects) { - - VkClearAttachment clear_att = { - .aspectMask = cmd_state->attachments[ds].pending_clear_aspects, - .clearValue = cmd_state->attachments[ds].clear_value, - }; - - - const uint8_t gen = cmd_buffer->device->info.gen; - bool clear_with_hiz = gen >= 8 && cmd_state->attachments[ds].aux_usage == - ISL_AUX_USAGE_HIZ; - const struct anv_image_view *iview = fb->attachments[ds]; - - if (clear_with_hiz) { - const bool clear_depth = clear_att.aspectMask & - VK_IMAGE_ASPECT_DEPTH_BIT; - const bool clear_stencil = clear_att.aspectMask & - VK_IMAGE_ASPECT_STENCIL_BIT; - - /* Check against restrictions for depth buffer clearing. A great GPU - * performance benefit isn't expected when using the HZ sequence for - * stencil-only clears. Therefore, we don't emit a HZ op sequence for - * a stencil clear in addition to using the BLORP-fallback for depth. - */ - if (clear_depth) { - if (!blorp_can_hiz_clear_depth(gen, iview->isl.format, - iview->image->samples, - render_area.offset.x, - render_area.offset.y, - render_area.offset.x + - render_area.extent.width, - render_area.offset.y + - render_area.extent.height)) { - clear_with_hiz = false; - } else if (clear_att.clearValue.depthStencil.depth != - ANV_HZ_FC_VAL) { - /* Don't enable fast depth clears for any color not equal to - * ANV_HZ_FC_VAL. - */ - clear_with_hiz = false; - } else if (gen == 8 && - anv_can_sample_with_hiz(&cmd_buffer->device->info, - iview->aspect_mask, - iview->image->samples)) { - /* Only gen9+ supports returning ANV_HZ_FC_VAL when sampling a - * fast-cleared portion of a HiZ buffer. Testing has revealed - * that Gen8 only supports returning 0.0f. Gens prior to gen8 do - * not support this feature at all. - */ - clear_with_hiz = false; - } - } - - if (clear_with_hiz) { - blorp_gen8_hiz_clear_attachments(&batch, iview->image->samples, - render_area.offset.x, - render_area.offset.y, - render_area.offset.x + - render_area.extent.width, - render_area.offset.y + - render_area.extent.height, - clear_depth, clear_stencil, - clear_att.clearValue. - depthStencil.stencil); - - /* From the SKL PRM, Depth Buffer Clear: - * - * Depth Buffer Clear Workaround - * Depth buffer clear pass using any of the methods (WM_STATE, - * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a - * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits - * “set” before starting to render. DepthStall and DepthFlush are - * not needed between consecutive depth clear passes nor is it - * required if the depth-clear pass was done with “full_surf_clear” - * bit set in the 3DSTATE_WM_HZ_OP. - */ - if (clear_depth) { - cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; - } - } - } - - if (!clear_with_hiz) { - clear_depth_stencil_attachment(cmd_buffer, &batch, - &clear_att, 1, &clear_rect); - } - - cmd_state->attachments[ds].pending_clear_aspects = 0; - } - - blorp_batch_finish(&batch); -} + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); -static void -resolve_image(struct blorp_batch *batch, - const struct anv_image *src_image, - enum isl_aux_usage src_aux_usage, - uint32_t src_level, uint32_t src_layer, - const struct anv_image *dst_image, - enum isl_aux_usage dst_aux_usage, - uint32_t dst_level, uint32_t dst_layer, - VkImageAspectFlags aspect_mask, - uint32_t src_x, uint32_t src_y, uint32_t dst_x, uint32_t dst_y, - uint32_t width, uint32_t height) -{ assert(src_image->type == VK_IMAGE_TYPE_2D); assert(src_image->samples > 1); assert(dst_image->type == VK_IMAGE_TYPE_2D); assert(dst_image->samples == 1); + assert(src_image->n_planes == dst_image->n_planes); + assert(!src_image->format->can_ycbcr); + assert(!dst_image->format->can_ycbcr); + + struct blorp_surf src_surf, dst_surf; + get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + src_aux_usage, &src_surf); + if (src_aux_usage == ISL_AUX_USAGE_MCS) { + src_surf.clear_color_addr = anv_to_blorp_address( + anv_image_get_clear_color_addr(cmd_buffer->device, src_image, + VK_IMAGE_ASPECT_COLOR_BIT)); + } + get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + dst_aux_usage, &dst_surf); + anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, + aspect, dst_aux_usage, + dst_level, dst_base_layer, layer_count); + + if (filter == BLORP_FILTER_NONE) { + /* If no explicit filter is provided, then it's implied by the type of + * the source image. + */ + if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) || + (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) || + isl_format_has_int_channel(src_surf.surf->format)) { + filter = BLORP_FILTER_SAMPLE_0; + } else { + filter = BLORP_FILTER_AVERAGE; + } + } - uint32_t a; - for_each_bit(a, aspect_mask) { - VkImageAspectFlagBits aspect = 1 << a; - - struct blorp_surf src_surf, dst_surf; - get_blorp_surf_for_anv_image(src_image, aspect, - src_aux_usage, &src_surf); - get_blorp_surf_for_anv_image(dst_image, aspect, - dst_aux_usage, &dst_surf); - - blorp_blit(batch, - &src_surf, src_level, src_layer, + for (uint32_t l = 0; l < layer_count; l++) { + blorp_blit(&batch, + &src_surf, src_level, src_base_layer + l, ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, - &dst_surf, dst_level, dst_layer, + &dst_surf, dst_level, dst_base_layer + l, ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY, src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, - 0x2600 /* GL_NEAREST */, false, false); + filter, false, false); } + + blorp_batch_finish(&batch); } void anv_CmdResolveImage( @@ -1328,8 +1405,7 @@ void anv_CmdResolveImage( ANV_FROM_HANDLE(anv_image, src_image, srcImage); ANV_FROM_HANDLE(anv_image, dst_image, dstImage); - struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + assert(!src_image->format->can_ycbcr); for (uint32_t r = 0; r < regionCount; r++) { assert(pRegions[r].srcSubresource.aspectMask == @@ -1340,63 +1416,69 @@ void anv_CmdResolveImage( const uint32_t layer_count = anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); - for (uint32_t layer = 0; layer < layer_count; layer++) { - resolve_image(&batch, - src_image, src_image->aux_usage, - pRegions[r].srcSubresource.mipLevel, - pRegions[r].srcSubresource.baseArrayLayer + layer, - dst_image, dst_image->aux_usage, - pRegions[r].dstSubresource.mipLevel, - pRegions[r].dstSubresource.baseArrayLayer + layer, - pRegions[r].dstSubresource.aspectMask, - pRegions[r].srcOffset.x, pRegions[r].srcOffset.y, - pRegions[r].dstOffset.x, pRegions[r].dstOffset.y, - pRegions[r].extent.width, pRegions[r].extent.height); + uint32_t aspect_bit; + anv_foreach_image_aspect_bit(aspect_bit, src_image, + pRegions[r].srcSubresource.aspectMask) { + enum isl_aux_usage src_aux_usage = + anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image, + (1 << aspect_bit), + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + srcImageLayout); + enum isl_aux_usage dst_aux_usage = + anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image, + (1 << aspect_bit), + VK_IMAGE_USAGE_TRANSFER_DST_BIT, + dstImageLayout); + + anv_image_msaa_resolve(cmd_buffer, + src_image, src_aux_usage, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer, + dst_image, dst_aux_usage, + pRegions[r].dstSubresource.mipLevel, + pRegions[r].dstSubresource.baseArrayLayer, + (1 << aspect_bit), + pRegions[r].srcOffset.x, + pRegions[r].srcOffset.y, + pRegions[r].dstOffset.x, + pRegions[r].dstOffset.y, + pRegions[r].extent.width, + pRegions[r].extent.height, + layer_count, BLORP_FILTER_NONE); } } - - blorp_batch_finish(&batch); } void -anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image *image, - const uint32_t base_level, const uint32_t level_count, - const uint32_t base_layer, uint32_t layer_count) +anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + uint32_t base_level, uint32_t level_count, + uint32_t base_layer, uint32_t layer_count) { - assert(image->type == VK_IMAGE_TYPE_3D || image->extent.depth == 1); - - if (image->type == VK_IMAGE_TYPE_3D) { - assert(base_layer == 0); - assert(layer_count == anv_minify(image->extent.depth, base_level)); - } - struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); - struct blorp_surf surf; - get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, - image->aux_usage == ISL_AUX_USAGE_NONE ? - ISL_AUX_USAGE_CCS_D : image->aux_usage, - &surf); - - /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": - * - * "After Render target fast clear, pipe-control with color cache - * write-flush must be issued before sending any DRAW commands on - * that render target." - * - * This comment is a bit cryptic and doesn't really tell you what's going - * or what's really needed. It appears that fast clear ops are not - * properly synchronized with other drawing. This means that we cannot - * have a fast clear operation in the pipe at the same time as other - * regular drawing operations. We need to use a PIPE_CONTROL to ensure - * that the contents of the previous draw hit the render target before we - * resolve and then use a second PIPE_CONTROL after the resolve to ensure - * that it is completed before any additional drawing occurs. + /* We don't know who touched the main surface last so flush a bunch of + * caches to ensure we get good data. */ cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | + ANV_PIPE_DATA_CACHE_FLUSH_BIT | + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; + + struct blorp_surf surf; + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, aspect, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + VK_IMAGE_LAYOUT_GENERAL, + ISL_AUX_USAGE_NONE, &surf); + assert(surf.aux_usage == ISL_AUX_USAGE_NONE); + + struct blorp_surf shadow_surf; + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, + image, aspect, &shadow_surf); for (uint32_t l = 0; l < level_count; l++) { const uint32_t level = base_level + l; @@ -1410,157 +1492,402 @@ anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer, if (image->type == VK_IMAGE_TYPE_3D) layer_count = extent.depth; - assert(level < anv_image_aux_levels(image)); - assert(base_layer + layer_count <= anv_image_aux_layers(image, level)); - blorp_fast_clear(&batch, &surf, surf.surf->format, - level, base_layer, layer_count, - 0, 0, extent.width, extent.height); + for (uint32_t a = 0; a < layer_count; a++) { + const uint32_t layer = base_layer + a; + + blorp_copy(&batch, &surf, level, layer, + &shadow_surf, level, layer, + 0, 0, 0, 0, extent.width, extent.height); + } } + /* We just wrote to the buffer with the render cache. Flush it. */ cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT; + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + + blorp_batch_finish(&batch); } void -anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) +anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, + enum isl_aux_usage aux_usage, + enum isl_format format, struct isl_swizzle swizzle, + uint32_t level, uint32_t base_layer, uint32_t layer_count, + VkRect2D area, union isl_color_value clear_color) { - struct anv_framebuffer *fb = cmd_buffer->state.framebuffer; - struct anv_subpass *subpass = cmd_buffer->state.subpass; - - if (subpass->has_resolve) { - struct blorp_batch batch; - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); - /* We are about to do some MSAA resolves. We need to flush so that the - * result of writes to the MSAA color attachments show up in the sampler - * when we blit to the single-sampled resolve target. - */ - cmd_buffer->state.pending_pipe_bits |= - ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; - - for (uint32_t i = 0; i < subpass->color_count; ++i) { - uint32_t src_att = subpass->color_attachments[i].attachment; - uint32_t dst_att = subpass->resolve_attachments[i].attachment; - - if (dst_att == VK_ATTACHMENT_UNUSED) - continue; - - assert(src_att < cmd_buffer->state.pass->attachment_count); - assert(dst_att < cmd_buffer->state.pass->attachment_count); - - if (cmd_buffer->state.attachments[dst_att].pending_clear_aspects) { - /* From the Vulkan 1.0 spec: - * - * If the first use of an attachment in a render pass is as a - * resolve attachment, then the loadOp is effectively ignored - * as the resolve is guaranteed to overwrite all pixels in the - * render area. - */ - cmd_buffer->state.attachments[dst_att].pending_clear_aspects = 0; - } + /* We don't support planar images with multisampling yet */ + assert(image->n_planes == 1); - struct anv_image_view *src_iview = fb->attachments[src_att]; - struct anv_image_view *dst_iview = fb->attachments[dst_att]; + struct blorp_batch batch; + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); - enum isl_aux_usage src_aux_usage = - cmd_buffer->state.attachments[src_att].aux_usage; - enum isl_aux_usage dst_aux_usage = - cmd_buffer->state.attachments[dst_att].aux_usage; - - const VkRect2D render_area = cmd_buffer->state.render_area; - - assert(src_iview->aspect_mask == dst_iview->aspect_mask); - - resolve_image(&batch, src_iview->image, src_aux_usage, - src_iview->isl.base_level, - src_iview->isl.base_array_layer, - dst_iview->image, dst_aux_usage, - dst_iview->isl.base_level, - dst_iview->isl.base_array_layer, - src_iview->aspect_mask, - render_area.offset.x, render_area.offset.y, - render_area.offset.x, render_area.offset.y, - render_area.extent.width, render_area.extent.height); - } + struct blorp_surf surf; + get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + aux_usage, &surf); + anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage, + level, base_layer, layer_count); + + blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle), + level, base_layer, layer_count, + area.offset.x, area.offset.y, + area.offset.x + area.extent.width, + area.offset.y + area.extent.height, + clear_color, NULL); - blorp_batch_finish(&batch); - } + blorp_batch_finish(&batch); } void -anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer, - const struct anv_image *image, - enum blorp_hiz_op op) +anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlags aspects, + enum isl_aux_usage depth_aux_usage, + uint32_t level, + uint32_t base_layer, uint32_t layer_count, + VkRect2D area, + float depth_value, uint8_t stencil_value) { - assert(image); + assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); - /* Don't resolve depth buffers without an auxiliary HiZ buffer and - * don't perform such a resolve on gens that don't support it. + struct blorp_batch batch; + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + + struct blorp_surf depth = {}; + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, VK_IMAGE_ASPECT_DEPTH_BIT, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + depth_aux_usage, &depth); + depth.clear_color.f32[0] = ANV_HZ_FC_VAL; + } + + struct blorp_surf stencil = {}; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, VK_IMAGE_ASPECT_STENCIL_BIT, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + ISL_AUX_USAGE_NONE, &stencil); + } + + /* Blorp may choose to clear stencil using RGBA32_UINT for better + * performance. If it does this, we need to flush it out of the depth + * cache before rendering to it. */ - if (cmd_buffer->device->info.gen < 8 || - image->aux_usage != ISL_AUX_USAGE_HIZ) - return; + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT; + + blorp_clear_depth_stencil(&batch, &depth, &stencil, + level, base_layer, layer_count, + area.offset.x, area.offset.y, + area.offset.x + area.extent.width, + area.offset.y + area.extent.height, + aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + depth_value, + (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0, + stencil_value); + + /* Blorp may choose to clear stencil using RGBA32_UINT for better + * performance. If it does this, we need to flush it out of the render + * cache before someone starts trying to do stencil on it. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT; + + struct blorp_surf stencil_shadow; + if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + &stencil_shadow)) { + union isl_color_value stencil_color = { + .u32 = { stencil_value }, + }; + blorp_clear(&batch, &stencil_shadow, + ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY, + level, base_layer, layer_count, + area.offset.x, area.offset.y, + area.offset.x + area.extent.width, + area.offset.y + area.extent.height, + stencil_color, NULL); + } - assert(op == BLORP_HIZ_OP_HIZ_RESOLVE || - op == BLORP_HIZ_OP_DEPTH_RESOLVE); + blorp_batch_finish(&batch); +} + +void +anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlagBits aspect, uint32_t level, + uint32_t base_layer, uint32_t layer_count, + enum isl_aux_op hiz_op) +{ + assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT); + assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level)); + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + assert(plane == 0); struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); struct blorp_surf surf; - get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT, - ISL_AUX_USAGE_NONE, &surf); + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, VK_IMAGE_ASPECT_DEPTH_BIT, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + image->planes[plane].aux_usage, &surf); + surf.clear_color.f32[0] = ANV_HZ_FC_VAL; - /* Manually add the aux HiZ surf */ - surf.aux_surf = &image->aux_surface.isl, - surf.aux_addr = (struct blorp_address) { - .buffer = image->bo, - .offset = image->offset + image->aux_surface.offset, - }; - surf.aux_usage = ISL_AUX_USAGE_HIZ; + blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op); - surf.clear_color.f32[0] = ANV_HZ_FC_VAL; + blorp_batch_finish(&batch); +} + +void +anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + VkImageAspectFlags aspects, + uint32_t level, + uint32_t base_layer, uint32_t layer_count, + VkRect2D area, uint8_t stencil_value) +{ + assert(image->aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT)); + + struct blorp_batch batch; + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + + struct blorp_surf depth = {}; + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + uint32_t plane = anv_image_aspect_to_plane(image->aspects, + VK_IMAGE_ASPECT_DEPTH_BIT); + assert(base_layer + layer_count <= + anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level)); + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, VK_IMAGE_ASPECT_DEPTH_BIT, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + image->planes[plane].aux_usage, &depth); + depth.clear_color.f32[0] = ANV_HZ_FC_VAL; + } + + struct blorp_surf stencil = {}; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + uint32_t plane = anv_image_aspect_to_plane(image->aspects, + VK_IMAGE_ASPECT_STENCIL_BIT); + get_blorp_surf_for_anv_image(cmd_buffer->device, + image, VK_IMAGE_ASPECT_STENCIL_BIT, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + image->planes[plane].aux_usage, &stencil); + } + + /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear": + * + * "The following is required when performing a depth buffer clear with + * using the WM_STATE or 3DSTATE_WM: + * + * * If other rendering operations have preceded this clear, a + * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit + * enabled must be issued before the rectangle primitive used for + * the depth buffer clear operation. + * * [...]" + * + * Even though the PRM only says that this is required if using 3DSTATE_WM + * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional + * hangs when doing a clear with WM_HZ_OP. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; + + blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil, + level, base_layer, layer_count, + area.offset.x, area.offset.y, + area.offset.x + area.extent.width, + area.offset.y + area.extent.height, + aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + ANV_HZ_FC_VAL, + aspects & VK_IMAGE_ASPECT_STENCIL_BIT, + stencil_value); - blorp_hiz_op(&batch, &surf, 0, 0, 1, op); blorp_batch_finish(&batch); + + /* From the SKL PRM, Depth Buffer Clear: + * + * "Depth Buffer Clear Workaround + * + * Depth buffer clear pass using any of the methods (WM_STATE, + * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL + * command with DEPTH_STALL bit and Depth FLUSH bits “set” before + * starting to render. DepthStall and DepthFlush are not needed between + * consecutive depth clear passes nor is it required if the depth-clear + * pass was done with “full_surf_clear” bit set in the + * 3DSTATE_WM_HZ_OP." + * + * Even though the PRM provides a bunch of conditions under which this is + * supposedly unnecessary, we choose to perform the flush unconditionally + * just to be safe. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | ANV_PIPE_DEPTH_STALL_BIT; } void -anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer, - const struct anv_state surface_state, - const struct anv_image * const image, - const uint8_t level, const uint32_t layer_count, - const enum blorp_fast_clear_op op) +anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + enum isl_format format, struct isl_swizzle swizzle, + VkImageAspectFlagBits aspect, + uint32_t base_layer, uint32_t layer_count, + enum isl_aux_op mcs_op, union isl_color_value *clear_value, + bool predicate) { - assert(cmd_buffer && image); + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT); + assert(image->samples > 1); + assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0)); - /* The resolved subresource range must have a CCS buffer. */ - assert(level < anv_image_aux_levels(image)); - assert(layer_count <= anv_image_aux_layers(image, level)); - assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT && image->samples == 1); + /* Multisampling with multi-planar formats is not supported */ + assert(image->n_planes == 1); - /* Create a binding table for this surface state. */ - uint32_t binding_table; - VkResult result = - binding_table_for_surface_state(cmd_buffer, surface_state, - &binding_table); - if (result != VK_SUCCESS) - return; + struct blorp_batch batch; + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, + BLORP_BATCH_PREDICATE_ENABLE * predicate + + BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value); + + struct blorp_surf surf; + get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + ISL_AUX_USAGE_MCS, &surf); + + /* Blorp will store the clear color for us if we provide the clear color + * address and we are doing a fast clear. So we save the clear value into + * the blorp surface. + */ + if (clear_value) + surf.clear_color = *clear_value; + + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": + * + * "After Render target fast clear, pipe-control with color cache + * write-flush must be issued before sending any DRAW commands on + * that render target." + * + * This comment is a bit cryptic and doesn't really tell you what's going + * or what's really needed. It appears that fast clear ops are not + * properly synchronized with other drawing. This means that we cannot + * have a fast clear operation in the pipe at the same time as other + * regular drawing operations. We need to use a PIPE_CONTROL to ensure + * that the contents of the previous draw hit the render target before we + * resolve and then use a second PIPE_CONTROL after the resolve to ensure + * that it is completed before any additional drawing occurs. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT; + + switch (mcs_op) { + case ISL_AUX_OP_FAST_CLEAR: + blorp_fast_clear(&batch, &surf, format, swizzle, + 0, base_layer, layer_count, + 0, 0, image->extent.width, image->extent.height); + break; + case ISL_AUX_OP_PARTIAL_RESOLVE: + blorp_mcs_partial_resolve(&batch, &surf, format, + base_layer, layer_count); + break; + case ISL_AUX_OP_FULL_RESOLVE: + case ISL_AUX_OP_AMBIGUATE: + default: + unreachable("Unsupported MCS operation"); + } + + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT; + + blorp_batch_finish(&batch); +} + +void +anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer, + const struct anv_image *image, + enum isl_format format, struct isl_swizzle swizzle, + VkImageAspectFlagBits aspect, uint32_t level, + uint32_t base_layer, uint32_t layer_count, + enum isl_aux_op ccs_op, union isl_color_value *clear_value, + bool predicate) +{ + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV); + assert(image->samples == 1); + assert(level < anv_image_aux_levels(image, aspect)); + /* Multi-LOD YcBcR is not allowed */ + assert(image->n_planes == 1 || level == 0); + assert(base_layer + layer_count <= + anv_image_aux_layers(image, aspect, level)); + + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect); + uint32_t width_div = image->format->planes[plane].denominator_scales[0]; + uint32_t height_div = image->format->planes[plane].denominator_scales[1]; + uint32_t level_width = anv_minify(image->extent.width, level) / width_div; + uint32_t level_height = anv_minify(image->extent.height, level) / height_div; struct blorp_batch batch; blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, - BLORP_BATCH_PREDICATE_ENABLE); + BLORP_BATCH_PREDICATE_ENABLE * predicate + + BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value); struct blorp_surf surf; - get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, - image->aux_usage == ISL_AUX_USAGE_CCS_E ? - ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D, + get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect, + 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX, + image->planes[plane].aux_usage, &surf); - blorp_ccs_resolve_attachment(&batch, binding_table, &surf, level, - layer_count, image->color_surface.isl.format, - op); + /* Blorp will store the clear color for us if we provide the clear color + * address and we are doing a fast clear. So we save the clear value into + * the blorp surface. + */ + if (clear_value) + surf.clear_color = *clear_value; + + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear": + * + * "After Render target fast clear, pipe-control with color cache + * write-flush must be issued before sending any DRAW commands on + * that render target." + * + * This comment is a bit cryptic and doesn't really tell you what's going + * or what's really needed. It appears that fast clear ops are not + * properly synchronized with other drawing. This means that we cannot + * have a fast clear operation in the pipe at the same time as other + * regular drawing operations. We need to use a PIPE_CONTROL to ensure + * that the contents of the previous draw hit the render target before we + * resolve and then use a second PIPE_CONTROL after the resolve to ensure + * that it is completed before any additional drawing occurs. + */ + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT; + + switch (ccs_op) { + case ISL_AUX_OP_FAST_CLEAR: + blorp_fast_clear(&batch, &surf, format, swizzle, + level, base_layer, layer_count, + 0, 0, level_width, level_height); + break; + case ISL_AUX_OP_FULL_RESOLVE: + case ISL_AUX_OP_PARTIAL_RESOLVE: + blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count, + format, ccs_op); + break; + case ISL_AUX_OP_AMBIGUATE: + for (uint32_t a = 0; a < layer_count; a++) { + const uint32_t layer = base_layer + a; + blorp_ccs_ambiguate(&batch, &surf, level, layer); + } + break; + default: + unreachable("Unsupported CCS operation"); + } + + cmd_buffer->state.pending_pipe_bits |= + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_END_OF_PIPE_SYNC_BIT; blorp_batch_finish(&batch); }