#include <string.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sys/mman.h>
#include "anv_private.h"
#include "util/debug.h"
+#include "vk_util.h"
#include "vk_format_info.h"
* Exactly one bit must be set in \a aspect.
*/
static isl_surf_usage_flags_t
-choose_isl_surf_usage(VkImageUsageFlags vk_usage,
+choose_isl_surf_usage(VkImageCreateFlags vk_create_flags,
+ VkImageUsageFlags vk_usage,
VkImageAspectFlags aspect)
{
isl_surf_usage_flags_t isl_usage = 0;
if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
- if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
+ if (vk_create_flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
isl_usage |= ISL_SURF_USAGE_CUBE_BIT;
/* Even if we're only using it for transfer operations, clears to depth and
image->alignment = MAX2(image->alignment, surf->isl.alignment);
}
+
+static bool
+all_formats_ccs_e_compatible(const struct gen_device_info *devinfo,
+ const struct VkImageCreateInfo *vk_info)
+{
+ enum isl_format format =
+ anv_get_isl_format(devinfo, vk_info->format,
+ VK_IMAGE_ASPECT_COLOR_BIT, vk_info->tiling);
+
+ if (!isl_format_supports_ccs_e(devinfo, format))
+ return false;
+
+ if (!(vk_info->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT))
+ return true;
+
+ const VkImageFormatListCreateInfoKHR *fmt_list =
+ vk_find_struct_const(vk_info->pNext, IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
+
+ if (!fmt_list || fmt_list->viewFormatCount == 0)
+ return false;
+
+ for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) {
+ enum isl_format view_format =
+ anv_get_isl_format(devinfo, fmt_list->pViewFormats[i],
+ VK_IMAGE_ASPECT_COLOR_BIT, vk_info->tiling);
+
+ if (!isl_formats_are_ccs_e_compatible(devinfo, format, view_format))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * For color images that have an auxiliary surface, request allocation for an
+ * additional buffer that mainly stores fast-clear values. Use of this buffer
+ * allows us to access the image's subresources while being aware of their
+ * fast-clear values in non-trivial cases (e.g., outside of a render pass in
+ * which a fast clear has occurred).
+ *
+ * For the purpose of discoverability, the algorithm used to manage this buffer
+ * is described here. A clear value in this buffer is updated when a fast clear
+ * is performed on a subresource. One of two synchronization operations is
+ * performed in order for a following memory access to use the fast-clear
+ * value:
+ * a. Copy the value from the buffer to the surface state object used for
+ * reading. This is done implicitly when the value is the clear value
+ * predetermined to be the default in other surface state objects. This
+ * is currently only done explicitly for the operation below.
+ * b. Do (a) and use the surface state object to resolve the subresource.
+ * This is only done during layout transitions for decent performance.
+ *
+ * With the above scheme, we can fast-clear whenever the hardware allows except
+ * for two cases in which synchronization becomes impossible or undesirable:
+ * * The subresource is in the GENERAL layout and is cleared to a value
+ * other than the special default value.
+ *
+ * Performing a synchronization operation in order to read from the
+ * subresource is undesirable in this case. Firstly, b) is not an option
+ * because a layout transition isn't required between a write and read of
+ * an image in the GENERAL layout. Secondly, it's undesirable to do a)
+ * explicitly because it would require large infrastructural changes. The
+ * Vulkan API supports us in deciding not to optimize this layout by
+ * stating that using this layout may cause suboptimal performance. NOTE:
+ * the auxiliary buffer must always be enabled to support a) implicitly.
+ *
+ *
+ * * For the given miplevel, only some of the layers are cleared at once.
+ *
+ * If the user clears each layer to a different value, then tries to
+ * render to multiple layers at once, we have no ability to perform a
+ * synchronization operation in between. a) is not helpful because the
+ * object can only hold one clear value. b) is not an option because a
+ * layout transition isn't required in this case.
+ */
+static void
+add_fast_clear_state_buffer(struct anv_image *image,
+ const struct anv_device *device)
+{
+ assert(image && device);
+ assert(image->aux_surface.isl.size > 0 &&
+ image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+
+ /* The offset to the buffer of clear values must be dword-aligned for GPU
+ * memcpy operations. It is located immediately after the auxiliary surface.
+ */
+
+ /* Tiled images are guaranteed to be 4K aligned, so the image alignment
+ * should also be dword-aligned.
+ */
+ assert(image->alignment % 4 == 0);
+
+ /* Auxiliary buffers should be a multiple of 4K, so the start of the clear
+ * values buffer should already be dword-aligned.
+ */
+ assert(image->aux_surface.isl.size % 4 == 0);
+
+ /* This buffer should be at the very end of the image. */
+ assert(image->size ==
+ image->aux_surface.offset + image->aux_surface.isl.size);
+
+ const unsigned entry_size = anv_fast_clear_state_entry_size(device);
+ /* There's no padding between entries, so ensure that they're always a
+ * multiple of 32 bits in order to enable GPU memcpy operations.
+ */
+ assert(entry_size % 4 == 0);
+ image->size += entry_size * anv_image_aux_levels(image);
+}
+
/**
* Initialize the anv_image::*_surface selected by \a aspect. Then update the
* image's memory requirements (that is, the image's size and alignment).
aspect, vk_info->tiling);
assert(format != ISL_FORMAT_UNSUPPORTED);
+ /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to
+ * fall back to linear on Broadwell and earlier because we aren't
+ * guaranteed that we can handle offsets correctly. On Sky Lake, the
+ * horizontal and vertical alignments are sufficiently high that we can
+ * just use RENDER_SURFACE_STATE::X/Y Offset.
+ */
+ bool needs_shadow = false;
+ if (dev->info.gen <= 8 &&
+ (vk_info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT_KHR) &&
+ vk_info->tiling == VK_IMAGE_TILING_OPTIMAL) {
+ assert(isl_format_is_compressed(format));
+ tiling_flags = ISL_TILING_LINEAR_BIT;
+ needs_shadow = true;
+ }
+
ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl,
.dim = vk_to_isl_surf_dim[vk_info->imageType],
.format = format,
.array_len = vk_info->arrayLayers,
.samples = vk_info->samples,
.min_alignment = 0,
- .min_pitch = anv_info->stride,
- .usage = choose_isl_surf_usage(image->usage, aspect),
+ .row_pitch = anv_info->stride,
+ .usage = choose_isl_surf_usage(vk_info->flags, image->usage, aspect),
.tiling_flags = tiling_flags);
/* isl_surf_init() will fail only if provided invalid input. Invalid input
add_surface(image, anv_surf);
+ /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to
+ * create an identical tiled shadow surface for use while texturing so we
+ * don't get garbage performance.
+ */
+ if (needs_shadow) {
+ assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(tiling_flags == ISL_TILING_LINEAR_BIT);
+
+ ok = isl_surf_init(&dev->isl_dev, &image->shadow_surface.isl,
+ .dim = vk_to_isl_surf_dim[vk_info->imageType],
+ .format = format,
+ .width = image->extent.width,
+ .height = image->extent.height,
+ .depth = image->extent.depth,
+ .levels = vk_info->mipLevels,
+ .array_len = vk_info->arrayLayers,
+ .samples = vk_info->samples,
+ .min_alignment = 0,
+ .row_pitch = anv_info->stride,
+ .usage = choose_isl_surf_usage(image->usage, image->usage, aspect),
+ .tiling_flags = ISL_TILING_ANY_MASK);
+
+ /* isl_surf_init() will fail only if provided invalid input. Invalid input
+ * is illegal in Vulkan.
+ */
+ assert(ok);
+
+ add_surface(image, &image->shadow_surface);
+ }
+
/* Add a HiZ surface to a depth buffer that will be used for rendering.
*/
if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ /* We don't advertise that depth buffers could be used as storage
+ * images.
+ */
+ assert(!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT));
+
/* Allow the user to control HiZ enabling. Disable by default on gen7
* because resolves are not currently implemented pre-BDW.
*/
if (!(image->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
/* It will never be used as an attachment, HiZ is pointless. */
- } else if (image->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
- /* From the 1.0.37 spec:
- *
- * "An attachment used as an input attachment and depth/stencil
- * attachment must be in either VK_IMAGE_LAYOUT_GENERAL or
- * VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL."
- *
- * It will never have a layout of
- * VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, so HiZ is
- * currently pointless. If transfer operations learn to use the HiZ
- * buffer, we can enable HiZ for VK_IMAGE_LAYOUT_GENERAL and support
- * input attachments.
- */
- anv_finishme("Implement HiZ for input attachments");
- } else if (!env_var_as_boolean("INTEL_VK_HIZ", dev->info.gen >= 8)) {
- anv_finishme("Implement gen7 HiZ");
+ } else if (dev->info.gen == 7) {
+ anv_perf_warn(dev->instance, image, "Implement gen7 HiZ");
} else if (vk_info->mipLevels > 1) {
- anv_finishme("Test multi-LOD HiZ");
+ anv_perf_warn(dev->instance, image, "Enable multi-LOD HiZ");
} else if (vk_info->arrayLayers > 1) {
- anv_finishme("Implement multi-arrayLayer HiZ clears and resolves");
+ anv_perf_warn(dev->instance, image,
+ "Implement multi-arrayLayer HiZ clears and resolves");
} else if (dev->info.gen == 8 && vk_info->samples > 1) {
- anv_finishme("Test gen8 multisampled HiZ");
- } else {
+ anv_perf_warn(dev->instance, image, "Enable gen8 multisampled HiZ");
+ } else if (!unlikely(INTEL_DEBUG & DEBUG_NO_HIZ)) {
assert(image->aux_surface.isl.size == 0);
ok = isl_surf_get_hiz_surf(&dev->isl_dev, &image->depth_surface.isl,
&image->aux_surface.isl);
if (!unlikely(INTEL_DEBUG & DEBUG_NO_RBC)) {
assert(image->aux_surface.isl.size == 0);
ok = isl_surf_get_ccs_surf(&dev->isl_dev, &anv_surf->isl,
- &image->aux_surface.isl);
+ &image->aux_surface.isl, 0);
if (ok) {
+
+ /* Disable CCS when it is not useful (i.e., when you can't render
+ * to the image with CCS enabled).
+ */
+ if (!isl_format_supports_rendering(&dev->info, format)) {
+ /* While it may be technically possible to enable CCS for this
+ * image, we currently don't have things hooked up to get it
+ * working.
+ */
+ anv_perf_warn(dev->instance, image,
+ "This image format doesn't support rendering. "
+ "Not allocating an CCS buffer.");
+ image->aux_surface.isl.size = 0;
+ return VK_SUCCESS;
+ }
+
add_surface(image, &image->aux_surface);
+ add_fast_clear_state_buffer(image, dev);
/* For images created without MUTABLE_FORMAT_BIT set, we know that
* they will always be used with the original format. In
* compression on at all times for these formats.
*/
if (!(vk_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
- !(vk_info->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
- isl_format_supports_ccs_e(&dev->info, format)) {
+ all_formats_ccs_e_compatible(&dev->info, vk_info)) {
image->aux_usage = ISL_AUX_USAGE_CCS_E;
}
}
&image->aux_surface.isl);
if (ok) {
add_surface(image, &image->aux_surface);
+ add_fast_clear_state_buffer(image, dev);
image->aux_usage = ISL_AUX_USAGE_MCS;
}
}
anv_assert(pCreateInfo->extent.height > 0);
anv_assert(pCreateInfo->extent.depth > 0);
- image = vk_alloc2(&device->alloc, alloc, sizeof(*image), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!image)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
- memset(image, 0, sizeof(*image));
image->type = pCreateInfo->imageType;
image->extent = pCreateInfo->extent;
image->vk_format = pCreateInfo->format;
vk_free2(&device->alloc, pAllocator, image);
}
-VkResult anv_BindImageMemory(
- VkDevice _device,
- VkImage _image,
- VkDeviceMemory _memory,
- VkDeviceSize memoryOffset)
+static void
+anv_bind_image_memory(const VkBindImageMemoryInfoKHR *pBindInfo)
{
- ANV_FROM_HANDLE(anv_device, device, _device);
- ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
- ANV_FROM_HANDLE(anv_image, image, _image);
+ ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
+ ANV_FROM_HANDLE(anv_image, image, pBindInfo->image);
- if (mem) {
- image->bo = &mem->bo;
- image->offset = memoryOffset;
- } else {
+ assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR);
+
+ if (mem == NULL) {
image->bo = NULL;
image->offset = 0;
+ return;
}
- if (image->aux_surface.isl.size > 0) {
-
- /* The offset and size must be a multiple of 4K or else the
- * anv_gem_mmap call below will return NULL.
- */
- assert((image->offset + image->aux_surface.offset) % 4096 == 0);
- assert(image->aux_surface.isl.size % 4096 == 0);
-
- /* Auxiliary surfaces need to have their memory cleared to 0 before they
- * can be used. For CCS surfaces, this puts them in the "resolved"
- * state so they can be used with CCS enabled before we ever touch it
- * from the GPU. For HiZ, we need something valid or else we may get
- * GPU hangs on some hardware and 0 works fine.
- */
- void *map = anv_gem_mmap(device, image->bo->gem_handle,
- image->offset + image->aux_surface.offset,
- image->aux_surface.isl.size,
- device->info.has_llc ? 0 : I915_MMAP_WC);
+ image->bo = mem->bo;
+ image->offset = pBindInfo->memoryOffset;
+}
- /* If anv_gem_mmap returns NULL, it's likely that the kernel was
- * not able to find space on the host to create a proper mapping.
- */
- if (map == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+VkResult anv_BindImageMemory(
+ VkDevice device,
+ VkImage image,
+ VkDeviceMemory memory,
+ VkDeviceSize memoryOffset)
+{
+ anv_bind_image_memory(
+ &(VkBindImageMemoryInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR,
+ .image = image,
+ .memory = memory,
+ .memoryOffset = memoryOffset,
+ });
- memset(map, 0, image->aux_surface.isl.size);
+ return VK_SUCCESS;
+}
- anv_gem_munmap(map, image->aux_surface.isl.size);
- }
+VkResult anv_BindImageMemory2KHR(
+ VkDevice device,
+ uint32_t bindInfoCount,
+ const VkBindImageMemoryInfoKHR* pBindInfos)
+{
+ for (uint32_t i = 0; i < bindInfoCount; i++)
+ anv_bind_image_memory(&pBindInfos[i]);
return VK_SUCCESS;
}
}
}
+/**
+ * This function determines the optimal buffer to use for a given
+ * VkImageLayout and other pieces of information needed to make that
+ * determination. This does not determine the optimal buffer to use
+ * during a resolve operation.
+ *
+ * @param devinfo The device information of the Intel GPU.
+ * @param image The image that may contain a collection of buffers.
+ * @param aspects The aspect(s) of the image to be accessed.
+ * @param layout The current layout of the image aspect(s).
+ *
+ * @return The primary buffer that should be used for the given layout.
+ */
+enum isl_aux_usage
+anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
+ const struct anv_image * const image,
+ const VkImageAspectFlags aspects,
+ const VkImageLayout layout)
+{
+ /* Validate the inputs. */
+
+ /* The devinfo is needed as the optimal buffer varies across generations. */
+ assert(devinfo != NULL);
+
+ /* The layout of a NULL image is not properly defined. */
+ assert(image != NULL);
+
+ /* The aspects must be a subset of the image aspects. */
+ assert(aspects & image->aspects && aspects <= image->aspects);
+
+ /* Determine the optimal buffer. */
+
+ /* If there is no auxiliary surface allocated, we must use the one and only
+ * main buffer.
+ */
+ if (image->aux_surface.isl.size == 0)
+ return ISL_AUX_USAGE_NONE;
+
+ /* All images that use an auxiliary surface are required to be tiled. */
+ assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
+
+ /* On BDW+, when clearing the stencil aspect of a depth stencil image,
+ * the HiZ buffer allows us to record the clear with a relatively small
+ * number of packets. Prior to BDW, the HiZ buffer provides no known benefit
+ * to the stencil aspect.
+ */
+ if (devinfo->gen < 8 && aspects == VK_IMAGE_ASPECT_STENCIL_BIT)
+ return ISL_AUX_USAGE_NONE;
+
+ const bool color_aspect = aspects == VK_IMAGE_ASPECT_COLOR_BIT;
+
+ /* The following switch currently only handles depth stencil aspects.
+ * TODO: Handle the color aspect.
+ */
+ if (color_aspect)
+ return image->aux_usage;
+
+ switch (layout) {
+
+ /* Invalid Layouts */
+ case VK_IMAGE_LAYOUT_RANGE_SIZE:
+ case VK_IMAGE_LAYOUT_MAX_ENUM:
+ unreachable("Invalid image layout.");
+
+ /* Undefined layouts
+ *
+ * The pre-initialized layout is equivalent to the undefined layout for
+ * optimally-tiled images. We can only do color compression (CCS or HiZ)
+ * on tiled images.
+ */
+ case VK_IMAGE_LAYOUT_UNDEFINED:
+ case VK_IMAGE_LAYOUT_PREINITIALIZED:
+ return ISL_AUX_USAGE_NONE;
+
+
+ /* Transfer Layouts
+ *
+ * This buffer could be a depth buffer used in a transfer operation. BLORP
+ * currently doesn't use HiZ for transfer operations so we must use the main
+ * buffer for this layout. TODO: Enable HiZ in BLORP.
+ */
+ case VK_IMAGE_LAYOUT_GENERAL:
+ case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+ case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+ return ISL_AUX_USAGE_NONE;
+
+
+ /* Sampling Layouts */
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ assert(!color_aspect);
+ /* Fall-through */
+ case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+ case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL_KHR:
+ if (anv_can_sample_with_hiz(devinfo, aspects, image->samples))
+ return ISL_AUX_USAGE_HIZ;
+ else
+ return ISL_AUX_USAGE_NONE;
+
+ case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR:
+ assert(color_aspect);
+
+ /* On SKL+, the render buffer can be decompressed by the presentation
+ * engine. Support for this feature has not yet landed in the wider
+ * ecosystem. TODO: Update this code when support lands.
+ *
+ * From the BDW PRM, Vol 7, Render Target Resolve:
+ *
+ * If the MCS is enabled on a non-multisampled render target, the
+ * render target must be resolved before being used for other
+ * purposes (display, texture, CPU lock) The clear value from
+ * SURFACE_STATE is written into pixels in the render target
+ * indicated as clear in the MCS.
+ *
+ * Pre-SKL, the render buffer must be resolved before being used for
+ * presentation. We can infer that the auxiliary buffer is not used.
+ */
+ return ISL_AUX_USAGE_NONE;
+
+
+ /* Rendering Layouts */
+ case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+ assert(color_aspect);
+ unreachable("Color images are not yet supported.");
+
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL_KHR:
+ assert(!color_aspect);
+ return ISL_AUX_USAGE_HIZ;
+
+ case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
+ unreachable("VK_KHR_shared_presentable_image is unsupported");
+ }
+
+ /* If the layout isn't recognized in the exhaustive switch above, the
+ * VkImageLayout value is not defined in vulkan.h.
+ */
+ unreachable("layout is not a VkImageLayout enumeration member.");
+}
+
+
static struct anv_state
alloc_surface_state(struct anv_device *device)
{
}
}
+void
+anv_image_fill_surface_state(struct anv_device *device,
+ const struct anv_image *image,
+ VkImageAspectFlagBits aspect,
+ const struct isl_view *view_in,
+ isl_surf_usage_flags_t view_usage,
+ enum isl_aux_usage aux_usage,
+ const union isl_color_value *clear_color,
+ enum anv_image_view_state_flags flags,
+ struct anv_surface_state *state_inout,
+ struct brw_image_param *image_param_out)
+{
+ const struct anv_surface *surface =
+ anv_image_get_surface_for_aspect_mask(image, aspect);
+
+ struct isl_view view = *view_in;
+ view.usage |= view_usage;
+
+ /* For texturing with VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL from a
+ * compressed surface with a shadow surface, we use the shadow instead of
+ * the primary surface. The shadow surface will be tiled, unlike the main
+ * surface, so it should get significantly better performance.
+ */
+ if (image->shadow_surface.isl.size > 0 &&
+ isl_format_is_compressed(view.format) &&
+ (flags & ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL)) {
+ assert(isl_format_is_compressed(surface->isl.format));
+ assert(surface->isl.tiling == ISL_TILING_LINEAR);
+ assert(image->shadow_surface.isl.tiling != ISL_TILING_LINEAR);
+ surface = &image->shadow_surface;
+ }
+
+ if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
+ view.swizzle = anv_swizzle_for_render(view.swizzle);
+
+ /* If this is a HiZ buffer we can sample from with a programmable clear
+ * value (SKL+), define the clear value to the optimal constant.
+ */
+ union isl_color_value default_clear_color = { .u32 = { 0, } };
+ if (device->info.gen >= 9 && aux_usage == ISL_AUX_USAGE_HIZ)
+ default_clear_color.f32[0] = ANV_HZ_FC_VAL;
+ if (!clear_color)
+ clear_color = &default_clear_color;
+
+ const uint64_t address = image->offset + surface->offset;
+ const uint64_t aux_address = (aux_usage == ISL_AUX_USAGE_NONE) ? 0 :
+ image->offset + image->aux_surface.offset;
+
+ if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
+ !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY) &&
+ !isl_has_matching_typed_storage_image_format(&device->info,
+ view.format)) {
+ /* In this case, we are a writeable storage buffer which needs to be
+ * lowered to linear. All tiling and offset calculations will be done in
+ * the shader.
+ */
+ assert(aux_usage == ISL_AUX_USAGE_NONE);
+ isl_buffer_fill_state(&device->isl_dev, state_inout->state.map,
+ .address = address,
+ .size = surface->isl.size,
+ .format = ISL_FORMAT_RAW,
+ .stride = 1,
+ .mocs = device->default_mocs);
+ state_inout->address = address,
+ state_inout->aux_address = 0;
+ } else {
+ if (view_usage == ISL_SURF_USAGE_STORAGE_BIT &&
+ !(flags & ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY)) {
+ /* Typed surface reads support a very limited subset of the shader
+ * image formats. Translate it into the closest format the hardware
+ * supports.
+ */
+ assert(aux_usage == ISL_AUX_USAGE_NONE);
+ view.format = isl_lower_storage_image_format(&device->info,
+ view.format);
+ }
+
+ const struct isl_surf *isl_surf = &surface->isl;
+
+ struct isl_surf tmp_surf;
+ uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0;
+ if (isl_format_is_compressed(surface->isl.format) &&
+ !isl_format_is_compressed(view.format)) {
+ /* We're creating an uncompressed view of a compressed surface. This
+ * is allowed but only for a single level/layer.
+ */
+ assert(surface->isl.samples == 1);
+ assert(view.levels == 1);
+ assert(view.array_len == 1);
+
+ isl_surf_get_image_surf(&device->isl_dev, isl_surf,
+ view.base_level,
+ surface->isl.dim == ISL_SURF_DIM_3D ?
+ 0 : view.base_array_layer,
+ surface->isl.dim == ISL_SURF_DIM_3D ?
+ view.base_array_layer : 0,
+ &tmp_surf,
+ &offset_B, &tile_x_sa, &tile_y_sa);
+
+ /* The newly created image represents the one subimage we're
+ * referencing with this view so it only has one array slice and
+ * miplevel.
+ */
+ view.base_array_layer = 0;
+ view.base_level = 0;
+
+ /* We're making an uncompressed view here. The image dimensions need
+ * to be scaled down by the block size.
+ */
+ const struct isl_format_layout *fmtl =
+ isl_format_get_layout(surface->isl.format);
+ tmp_surf.format = view.format;
+ tmp_surf.logical_level0_px.width =
+ DIV_ROUND_UP(tmp_surf.logical_level0_px.width, fmtl->bw);
+ tmp_surf.logical_level0_px.height =
+ DIV_ROUND_UP(tmp_surf.logical_level0_px.height, fmtl->bh);
+ tmp_surf.phys_level0_sa.width /= fmtl->bw;
+ tmp_surf.phys_level0_sa.height /= fmtl->bh;
+ tile_x_sa /= fmtl->bw;
+ tile_y_sa /= fmtl->bh;
+
+ isl_surf = &tmp_surf;
+
+ if (device->info.gen <= 8) {
+ assert(surface->isl.tiling == ISL_TILING_LINEAR);
+ assert(tile_x_sa == 0);
+ assert(tile_y_sa == 0);
+ }
+ }
+
+ isl_surf_fill_state(&device->isl_dev, state_inout->state.map,
+ .surf = isl_surf,
+ .view = &view,
+ .address = address + offset_B,
+ .clear_color = *clear_color,
+ .aux_surf = &image->aux_surface.isl,
+ .aux_usage = aux_usage,
+ .aux_address = aux_address,
+ .mocs = device->default_mocs,
+ .x_offset_sa = tile_x_sa,
+ .y_offset_sa = tile_y_sa);
+ state_inout->address = address + offset_B;
+ if (device->info.gen >= 8) {
+ state_inout->aux_address = aux_address;
+ } else {
+ /* On gen7 and prior, the bottom 12 bits of the MCS base address are
+ * used to store other information. This should be ok, however,
+ * because surface buffer addresses are always 4K page alinged.
+ */
+ uint32_t *aux_addr_dw = state_inout->state.map +
+ device->isl_dev.ss.aux_addr_offset;
+ assert((aux_address & 0xfff) == 0);
+ assert(aux_address == (*aux_addr_dw & 0xfffff000));
+ state_inout->aux_address = *aux_addr_dw;
+ }
+ }
+
+ anv_state_flush(device, state_inout->state);
+
+ if (image_param_out) {
+ assert(view_usage == ISL_SURF_USAGE_STORAGE_BIT);
+ isl_surf_fill_image_param(&device->isl_dev, image_param_out,
+ &surface->isl, &view);
+ }
+}
VkResult
anv_CreateImageView(VkDevice _device,
ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
struct anv_image_view *iview;
- iview = vk_alloc2(&device->alloc, pAllocator, sizeof(*iview), 8,
+ iview = vk_zalloc2(&device->alloc, pAllocator, sizeof(*iview), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (iview == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
assert(range->layerCount > 0);
assert(range->baseMipLevel < image->levels);
- assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT |
- VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT));
+
+ const VkImageViewUsageCreateInfoKHR *usage_info =
+ vk_find_struct_const(pCreateInfo, IMAGE_VIEW_USAGE_CREATE_INFO_KHR);
+ VkImageUsageFlags view_usage = usage_info ? usage_info->usage : image->usage;
+ /* View usage should be a subset of image usage */
+ assert((view_usage & ~image->usage) == 0);
+ assert(view_usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT));
switch (image->type) {
default:
break;
}
- const struct anv_surface *surface =
- anv_image_get_surface_for_aspect_mask(image, range->aspectMask);
-
iview->image = image;
- iview->bo = image->bo;
- iview->offset = image->offset + surface->offset;
iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
iview->vk_format = pCreateInfo->format;
iview->isl.usage = 0;
}
- /* If the HiZ buffer can be sampled from, set the constant clear color.
- * If it cannot, disable the isl aux usage flag.
- */
- float red_clear_color = 0.0f;
- enum isl_aux_usage surf_usage = image->aux_usage;
- if (image->aux_usage == ISL_AUX_USAGE_HIZ) {
- if (iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT &&
- anv_can_sample_with_hiz(device->info.gen, image->samples)) {
- /* When a HiZ buffer is sampled on gen9+, ensure that
- * the constant fast clear value is set in the surface state.
- */
- if (device->info.gen >= 9)
- red_clear_color = ANV_HZ_FC_VAL;
- } else {
- surf_usage = ISL_AUX_USAGE_NONE;
- }
- }
-
/* Input attachment surfaces for color are allocated and filled
* out at BeginRenderPass time because they need compression information.
* Compression is not yet enabled for depth textures and stencil doesn't
* allow compression so we can just use the texture surface state from the
* view.
*/
- if (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT ||
- (image->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT &&
+ if (view_usage & VK_IMAGE_USAGE_SAMPLED_BIT ||
+ (view_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT &&
!(iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT))) {
- iview->sampler_surface_state = alloc_surface_state(device);
-
- struct isl_view view = iview->isl;
- view.usage |= ISL_SURF_USAGE_TEXTURE_BIT;
- isl_surf_fill_state(&device->isl_dev,
- iview->sampler_surface_state.map,
- .surf = &surface->isl,
- .view = &view,
- .clear_color.f32 = { red_clear_color,},
- .aux_surf = &image->aux_surface.isl,
- .aux_usage = surf_usage,
- .mocs = device->default_mocs);
-
- anv_state_flush(device, iview->sampler_surface_state);
- } else {
- iview->sampler_surface_state.alloc_size = 0;
+ iview->optimal_sampler_surface_state.state = alloc_surface_state(device);
+ iview->general_sampler_surface_state.state = alloc_surface_state(device);
+
+ enum isl_aux_usage general_aux_usage =
+ anv_layout_to_aux_usage(&device->info, image, iview->aspect_mask,
+ VK_IMAGE_LAYOUT_GENERAL);
+ enum isl_aux_usage optimal_aux_usage =
+ anv_layout_to_aux_usage(&device->info, image, iview->aspect_mask,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+
+ anv_image_fill_surface_state(device, image, iview->aspect_mask,
+ &iview->isl, ISL_SURF_USAGE_TEXTURE_BIT,
+ optimal_aux_usage, NULL,
+ ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL,
+ &iview->optimal_sampler_surface_state,
+ NULL);
+
+ anv_image_fill_surface_state(device, image, iview->aspect_mask,
+ &iview->isl, ISL_SURF_USAGE_TEXTURE_BIT,
+ general_aux_usage, NULL,
+ 0,
+ &iview->general_sampler_surface_state,
+ NULL);
}
/* NOTE: This one needs to go last since it may stomp isl_view.format */
- if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
- iview->storage_surface_state = alloc_surface_state(device);
- iview->writeonly_storage_surface_state = alloc_surface_state(device);
-
- struct isl_view view = iview->isl;
- view.usage |= ISL_SURF_USAGE_STORAGE_BIT;
-
- /* Write-only accesses always used a typed write instruction and should
- * therefore use the real format.
- */
- isl_surf_fill_state(&device->isl_dev,
- iview->writeonly_storage_surface_state.map,
- .surf = &surface->isl,
- .view = &view,
- .aux_surf = &image->aux_surface.isl,
- .aux_usage = surf_usage,
- .mocs = device->default_mocs);
-
- if (isl_has_matching_typed_storage_image_format(&device->info,
- format.isl_format)) {
- /* Typed surface reads support a very limited subset of the shader
- * image formats. Translate it into the closest format the hardware
- * supports.
- */
- view.format = isl_lower_storage_image_format(&device->info,
- format.isl_format);
-
- isl_surf_fill_state(&device->isl_dev,
- iview->storage_surface_state.map,
- .surf = &surface->isl,
- .view = &view,
- .aux_surf = &image->aux_surface.isl,
- .aux_usage = surf_usage,
- .mocs = device->default_mocs);
- } else {
- anv_fill_buffer_surface_state(device, iview->storage_surface_state,
- ISL_FORMAT_RAW,
- iview->offset,
- iview->bo->size - iview->offset, 1);
- }
-
- isl_surf_fill_image_param(&device->isl_dev,
- &iview->storage_image_param,
- &surface->isl, &iview->isl);
-
- anv_state_flush(device, iview->storage_surface_state);
- anv_state_flush(device, iview->writeonly_storage_surface_state);
- } else {
- iview->storage_surface_state.alloc_size = 0;
- iview->writeonly_storage_surface_state.alloc_size = 0;
+ if (view_usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+ iview->storage_surface_state.state = alloc_surface_state(device);
+ iview->writeonly_storage_surface_state.state = alloc_surface_state(device);
+
+ anv_image_fill_surface_state(device, image, iview->aspect_mask,
+ &iview->isl, ISL_SURF_USAGE_STORAGE_BIT,
+ ISL_AUX_USAGE_NONE, NULL,
+ 0,
+ &iview->storage_surface_state,
+ &iview->storage_image_param);
+
+ anv_image_fill_surface_state(device, image, iview->aspect_mask,
+ &iview->isl, ISL_SURF_USAGE_STORAGE_BIT,
+ ISL_AUX_USAGE_NONE, NULL,
+ ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY,
+ &iview->writeonly_storage_surface_state,
+ NULL);
}
*pView = anv_image_view_to_handle(iview);
if (!iview)
return;
- if (iview->sampler_surface_state.alloc_size > 0) {
+ if (iview->optimal_sampler_surface_state.state.alloc_size > 0) {
+ anv_state_pool_free(&device->surface_state_pool,
+ iview->optimal_sampler_surface_state.state);
+ }
+
+ if (iview->general_sampler_surface_state.state.alloc_size > 0) {
anv_state_pool_free(&device->surface_state_pool,
- iview->sampler_surface_state);
+ iview->general_sampler_surface_state.state);
}
- if (iview->storage_surface_state.alloc_size > 0) {
+ if (iview->storage_surface_state.state.alloc_size > 0) {
anv_state_pool_free(&device->surface_state_pool,
- iview->storage_surface_state);
+ iview->storage_surface_state.state);
}
- if (iview->writeonly_storage_surface_state.alloc_size > 0) {
+ if (iview->writeonly_storage_surface_state.state.alloc_size > 0) {
anv_state_pool_free(&device->surface_state_pool,
- iview->writeonly_storage_surface_state);
+ iview->writeonly_storage_surface_state.state);
}
vk_free2(&device->alloc, pAllocator, iview);
const uint32_t format_bs = isl_format_get_layout(view->format)->bpb / 8;
view->bo = buffer->bo;
view->offset = buffer->offset + pCreateInfo->offset;
- view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
- buffer->size - pCreateInfo->offset : pCreateInfo->range;
+ view->range = anv_buffer_get_range(buffer, pCreateInfo->offset,
+ pCreateInfo->range);
view->range = align_down_npot_u32(view->range, format_bs);
if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) {