#include "iris_context.h"
#include "iris_resource.h"
#include "iris_screen.h"
+#include "intel/common/gen_aux_map.h"
#include "intel/dev/gen_debug.h"
#include "isl/isl.h"
#include "drm-uapi/drm_fourcc.h"
if (!isl_format_supports_ccs_e(devinfo, linear_format))
return false;
- return true;
+ return devinfo->gen >= 9 && devinfo->gen <= 11;
}
case I915_FORMAT_MOD_Y_TILED:
case I915_FORMAT_MOD_X_TILED:
iris_resource_disable_aux(struct iris_resource *res)
{
iris_bo_unreference(res->aux.bo);
+ iris_bo_unreference(res->aux.extra_aux.bo);
iris_bo_unreference(res->aux.clear_color_bo);
free(res->aux.state);
res->aux.usage = ISL_AUX_USAGE_NONE;
res->aux.possible_usages = 1 << ISL_AUX_USAGE_NONE;
res->aux.sampler_usages = 1 << ISL_AUX_USAGE_NONE;
+ res->aux.has_hiz = 0;
res->aux.surf.size_B = 0;
res->aux.bo = NULL;
+ res->aux.extra_aux.surf.size_B = 0;
+ res->aux.extra_aux.bo = NULL;
res->aux.clear_color_bo = NULL;
res->aux.state = NULL;
}
return devinfo->gen >= 10 ? screen->isl_dev.ss.clear_color_state_size : 0;
}
+static void
+map_aux_addresses(struct iris_screen *screen, struct iris_resource *res)
+{
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ if (devinfo->gen >= 12 && isl_aux_usage_has_ccs(res->aux.usage)) {
+ void *aux_map_ctx = iris_bufmgr_get_aux_map_context(screen->bufmgr);
+ assert(aux_map_ctx);
+ const bool has_extra_ccs = res->aux.extra_aux.surf.size_B > 0;
+ struct iris_bo *aux_bo = has_extra_ccs ?
+ res->aux.extra_aux.bo : res->aux.bo;
+ const unsigned aux_offset = has_extra_ccs ?
+ res->aux.extra_aux.offset : res->aux.offset;
+ gen_aux_map_add_image(aux_map_ctx, &res->surf, res->bo->gtt_offset,
+ aux_bo->gtt_offset + aux_offset);
+ res->bo->aux_map_address = aux_bo->gtt_offset;
+ }
+}
+
+static bool
+want_ccs_e_for_format(const struct gen_device_info *devinfo,
+ enum isl_format format)
+{
+ if (!isl_format_supports_ccs_e(devinfo, format))
+ return false;
+
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+
+ /* CCS_E seems to significantly hurt performance with 32-bit floating
+ * point formats. For example, Paraview's "Wavelet Volume" case uses
+ * both R32_FLOAT and R32G32B32A32_FLOAT, and enabling CCS_E for those
+ * formats causes a 62% FPS drop.
+ *
+ * However, many benchmarks seem to use 16-bit float with no issues.
+ */
+ if (fmtl->channels.r.bits == 32 && fmtl->channels.r.type == ISL_SFLOAT)
+ return false;
+
+ return true;
+}
+
/**
* Configure aux for the resource, but don't allocate it. For images which
* might be shared with modifiers, we must allocate the image and aux data in
uint64_t *aux_size_B,
uint32_t *alloc_flags)
{
- struct isl_device *isl_dev = &screen->isl_dev;
- enum isl_aux_state initial_state;
- UNUSED bool ok = false;
+ const struct gen_device_info *devinfo = &screen->devinfo;
+
+ /* Try to create the auxiliary surfaces allowed by the modifier or by
+ * the user if no modifier is specified.
+ */
+ assert(!res->mod_info || res->mod_info->aux_usage == ISL_AUX_USAGE_NONE ||
+ res->mod_info->aux_usage == ISL_AUX_USAGE_CCS_E);
+
+ const bool has_mcs = !res->mod_info &&
+ isl_surf_get_mcs_surf(&screen->isl_dev, &res->surf, &res->aux.surf);
+
+ const bool has_hiz = !res->mod_info && !(INTEL_DEBUG & DEBUG_NO_HIZ) &&
+ isl_surf_get_hiz_surf(&screen->isl_dev, &res->surf, &res->aux.surf);
+
+ const bool has_ccs =
+ ((!res->mod_info && !(INTEL_DEBUG & DEBUG_NO_RBC)) ||
+ (res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE)) &&
+ isl_surf_get_ccs_surf(&screen->isl_dev, &res->surf, &res->aux.surf,
+ &res->aux.extra_aux.surf, 0);
+
+ /* Having both HIZ and MCS is impossible. */
+ assert(!has_mcs || !has_hiz);
+
+ /* Ensure aux surface creation for MCS_CCS and HIZ_CCS is correct. */
+ if (has_ccs && (has_mcs || has_hiz)) {
+ assert(res->aux.extra_aux.surf.size_B > 0 &&
+ res->aux.extra_aux.surf.usage & ISL_SURF_USAGE_CCS_BIT);
+ assert(res->aux.surf.size_B > 0 &&
+ res->aux.surf.usage &
+ (ISL_SURF_USAGE_HIZ_BIT | ISL_SURF_USAGE_MCS_BIT));
+ }
+ if (res->mod_info && has_ccs) {
+ /* Only allow a CCS modifier if the aux was created successfully. */
+ res->aux.possible_usages |= 1 << res->mod_info->aux_usage;
+ } else if (has_mcs) {
+ res->aux.possible_usages |= 1 << ISL_AUX_USAGE_MCS;
+ } else if (has_hiz) {
+ res->aux.possible_usages |=
+ 1 << (has_ccs ? ISL_AUX_USAGE_HIZ_CCS : ISL_AUX_USAGE_HIZ);
+ } else if (has_ccs) {
+ if (want_ccs_e_for_format(devinfo, res->surf.format))
+ res->aux.possible_usages |= 1 << ISL_AUX_USAGE_CCS_E;
+
+ if (isl_format_supports_ccs_d(devinfo, res->surf.format))
+ res->aux.possible_usages |= 1 << ISL_AUX_USAGE_CCS_D;
+ }
+
+ res->aux.usage = util_last_bit(res->aux.possible_usages) - 1;
+
+ res->aux.sampler_usages = res->aux.possible_usages;
+
+ /* We don't always support sampling with hiz. But when we do, it must be
+ * single sampled.
+ */
+ if (!devinfo->has_sample_with_hiz || res->surf.samples > 1)
+ res->aux.sampler_usages &= ~(1 << ISL_AUX_USAGE_HIZ);
+
+ /* We don't always support sampling with HIZ_CCS. But when we do, treat it
+ * as CCS_E.*/
+ res->aux.sampler_usages &= ~(1 << ISL_AUX_USAGE_HIZ_CCS);
+ if (isl_surf_supports_hiz_ccs_wt(devinfo, &res->surf, res->aux.usage))
+ res->aux.sampler_usages |= 1 << ISL_AUX_USAGE_CCS_E;
+
+ enum isl_aux_state initial_state;
*aux_size_B = 0;
*alloc_flags = 0;
assert(!res->aux.bo);
switch (res->aux.usage) {
case ISL_AUX_USAGE_NONE:
- res->aux.surf.size_B = 0;
- ok = true;
- break;
+ /* Having no aux buffer is only okay if there's no modifier with aux. */
+ return !res->mod_info || res->mod_info->aux_usage == ISL_AUX_USAGE_NONE;
case ISL_AUX_USAGE_HIZ:
+ case ISL_AUX_USAGE_HIZ_CCS:
initial_state = ISL_AUX_STATE_AUX_INVALID;
- ok = isl_surf_get_hiz_surf(isl_dev, &res->surf, &res->aux.surf);
break;
case ISL_AUX_USAGE_MCS:
+ case ISL_AUX_USAGE_MCS_CCS:
/* The Ivybridge PRM, Vol 2 Part 1 p326 says:
*
* "When MCS buffer is enabled and bound to MSRT, it is required
* 1's, so we simply memset it to 0xff.
*/
initial_state = ISL_AUX_STATE_CLEAR;
- ok = isl_surf_get_mcs_surf(isl_dev, &res->surf, &res->aux.surf);
break;
case ISL_AUX_USAGE_CCS_D:
case ISL_AUX_USAGE_CCS_E:
else
initial_state = ISL_AUX_STATE_PASS_THROUGH;
*alloc_flags |= BO_ALLOC_ZEROED;
- ok = isl_surf_get_ccs_surf(isl_dev, &res->surf, &res->aux.surf, 0);
break;
}
- /* We should have a valid aux_surf. */
- if (!ok)
- return false;
-
- /* No work is needed for a zero-sized auxiliary buffer. */
- if (res->aux.surf.size_B == 0)
- return true;
-
if (!res->aux.state) {
/* Create the aux_state for the auxiliary buffer. */
res->aux.state = create_aux_state_map(res, initial_state);
uint64_t size = res->aux.surf.size_B;
+ /* Allocate space in the buffer for storing the CCS. */
+ if (res->aux.extra_aux.surf.size_B > 0) {
+ res->aux.extra_aux.offset =
+ ALIGN(size, res->aux.extra_aux.surf.alignment_B);
+ size = res->aux.extra_aux.offset + res->aux.extra_aux.surf.size_B;
+ }
+
/* Allocate space in the buffer for storing the clear color. On modern
* platforms (gen > 9), we can read it directly from such buffer.
*
size += iris_get_aux_clear_color_state_size(screen);
*aux_size_B = size;
- if (res->aux.usage == ISL_AUX_USAGE_HIZ) {
+ if (isl_aux_usage_has_hiz(res->aux.usage)) {
for (unsigned level = 0; level < res->surf.levels; ++level) {
uint32_t width = u_minify(res->surf.phys_level0_sa.width, level);
uint32_t height = u_minify(res->surf.phys_level0_sa.height, level);
}
if (iris_resource_get_aux_state(res, 0, 0) != ISL_AUX_STATE_AUX_INVALID) {
- uint8_t memset_value = res->aux.usage == ISL_AUX_USAGE_MCS ? 0xFF : 0;
+ uint8_t memset_value = isl_aux_usage_has_mcs(res->aux.usage) ? 0xFF : 0;
memset((char*)map + res->aux.offset, memset_value,
res->aux.surf.size_B);
}
+ /* Bspec section titled : MCS/CCS Buffers for Render Target(s) states:
+ * - If Software wants to enable Color Compression without Fast clear,
+ * Software needs to initialize MCS with zeros.
+ * - Lossless compression and CCS initialized to all F (using HW Fast
+ * Clear or SW direct Clear)
+ *
+ * We think, the first bullet point above is referring to CCS aux
+ * surface. Since we initialize the MCS in the clear state, we also
+ * initialize the CCS in the clear state (via SW direct clear) to keep
+ * the two in sync.
+ */
+ memset((char*)map + res->aux.extra_aux.offset,
+ isl_aux_usage_has_mcs(res->aux.usage) ? 0xFF : 0,
+ res->aux.extra_aux.surf.size_B);
+
/* Zero the indirect clear color to match ::fast_clear_color. */
memset((char *)map + res->aux.clear_color_offset, 0,
clear_color_state_size);
iris_bo_unmap(res->aux.bo);
}
+ if (res->aux.extra_aux.surf.size_B > 0) {
+ res->aux.extra_aux.bo = res->aux.bo;
+ iris_bo_reference(res->aux.extra_aux.bo);
+ }
+
if (clear_color_state_size > 0) {
res->aux.clear_color_bo = res->aux.bo;
iris_bo_reference(res->aux.clear_color_bo);
* block sizes.
*/
res->aux.bo = iris_bo_alloc_tiled(screen->bufmgr, "aux buffer", size, 4096,
- IRIS_MEMZONE_OTHER, I915_TILING_Y,
+ IRIS_MEMZONE_OTHER,
+ isl_tiling_to_i915_tiling(res->aux.surf.tiling),
res->aux.surf.row_pitch_B, alloc_flags);
if (!res->aux.bo) {
return false;
iris_get_aux_clear_color_state_size(screen)))
return false;
+ map_aux_addresses(screen, res);
+
return true;
}
res->base.next = NULL;
}
-static bool
-supports_mcs(const struct isl_surf *surf)
-{
- /* MCS compression only applies to multisampled resources. */
- if (surf->samples <= 1)
- return false;
-
- /* Depth and stencil buffers use the IMS (interleaved) layout. */
- if (isl_surf_usage_is_depth_or_stencil(surf->usage))
- return false;
-
- return true;
-}
-
-static bool
-supports_ccs(const struct gen_device_info *devinfo,
- const struct isl_surf *surf)
-{
- /* CCS only supports singlesampled resources. */
- if (surf->samples > 1)
- return false;
-
- /* Note: still need to check the format! */
-
- return true;
-}
-
-static bool
-want_ccs_e_for_format(const struct gen_device_info *devinfo,
- enum isl_format format)
-{
- if (!isl_format_supports_ccs_e(devinfo, format))
- return false;
-
- const struct isl_format_layout *fmtl = isl_format_get_layout(format);
-
- /* CCS_E seems to significantly hurt performance with 32-bit floating
- * point formats. For example, Paraview's "Wavelet Volume" case uses
- * both R32_FLOAT and R32G32B32A32_FLOAT, and enabling CCS_E for those
- * formats causes a 62% FPS drop.
- *
- * However, many benchmarks seem to use 16-bit float with no issues.
- */
- if (fmtl->channels.r.bits == 32 && fmtl->channels.r.type == ISL_SFLOAT)
- return false;
-
- return true;
-}
-
static struct pipe_resource *
iris_resource_create_for_buffer(struct pipe_screen *pscreen,
const struct pipe_resource *templ)
} else {
if (modifiers_count > 0) {
fprintf(stderr, "Unsupported modifier, resource creation failed.\n");
- return NULL;
- }
-
- /* No modifiers - we can select our own tiling. */
-
- if (has_depth) {
- /* Depth must be Y-tiled */
- tiling_flags = ISL_TILING_Y0_BIT;
- } else if (templ->format == PIPE_FORMAT_S8_UINT) {
- /* Stencil must be W-tiled */
- tiling_flags = ISL_TILING_W_BIT;
- } else if (templ->target == PIPE_BUFFER ||
- templ->target == PIPE_TEXTURE_1D ||
- templ->target == PIPE_TEXTURE_1D_ARRAY) {
- /* Use linear for buffers and 1D textures */
- tiling_flags = ISL_TILING_LINEAR_BIT;
+ goto fail;
}
/* Use linear for staging buffers */
.tiling_flags = tiling_flags);
assert(isl_surf_created_successfully);
- if (res->mod_info) {
- res->aux.possible_usages |= 1 << res->mod_info->aux_usage;
- } else if (supports_mcs(&res->surf)) {
- res->aux.possible_usages |= 1 << ISL_AUX_USAGE_MCS;
- } else if (has_depth) {
- if (likely(!(INTEL_DEBUG & DEBUG_NO_HIZ)))
- res->aux.possible_usages |= 1 << ISL_AUX_USAGE_HIZ;
- } else if (likely(!(INTEL_DEBUG & DEBUG_NO_RBC)) &&
- supports_ccs(devinfo, &res->surf)) {
- if (want_ccs_e_for_format(devinfo, res->surf.format))
- res->aux.possible_usages |= 1 << ISL_AUX_USAGE_CCS_E;
-
- if (isl_format_supports_ccs_d(devinfo, res->surf.format))
- res->aux.possible_usages |= 1 << ISL_AUX_USAGE_CCS_D;
- }
-
- res->aux.usage = util_last_bit(res->aux.possible_usages) - 1;
-
- res->aux.sampler_usages = res->aux.possible_usages;
-
- /* We don't always support sampling with hiz. But when we do, it must be
- * single sampled.
- */
- if (!devinfo->has_sample_with_hiz || res->surf.samples > 1) {
- res->aux.sampler_usages &= ~(1 << ISL_AUX_USAGE_HIZ);
- }
-
const char *name = "miptree";
enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
bo_size = res->surf.size_B;
}
- res->bo = iris_bo_alloc_tiled(screen->bufmgr, name, bo_size, 4096, memzone,
+ uint32_t alignment = MAX2(4096, res->surf.alignment_B);
+ res->bo = iris_bo_alloc_tiled(screen->bufmgr, name, bo_size, alignment,
+ memzone,
isl_tiling_to_i915_tiling(res->surf.tiling),
res->surf.row_pitch_B, flags);
res->aux.clear_color_offset += aux_offset;
if (!iris_resource_init_aux_buf(res, flags, clear_color_state_size))
aux_enabled = false;
+ map_aux_addresses(screen, res);
}
}
- if (!aux_enabled)
- iris_resource_disable_aux(res);
+ if (!aux_enabled) {
+ if (res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE)
+ goto fail;
+ else
+ iris_resource_disable_aux(res);
+ }
return &res->base;
if (res->mod_info->aux_usage != ISL_AUX_USAGE_NONE) {
uint32_t alloc_flags;
uint64_t size;
- res->aux.usage = res->mod_info->aux_usage;
- res->aux.possible_usages = 1 << res->mod_info->aux_usage;
- res->aux.sampler_usages = res->aux.possible_usages;
bool ok = iris_resource_configure_aux(screen, res, true, &size,
&alloc_flags);
assert(ok);
bool mod_with_aux =
res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
bool wants_aux = mod_with_aux && plane > 0;
- struct iris_bo *bo = wants_aux ? res->aux.bo : res->bo;
bool result;
unsigned handle;
+ if (iris_resource_unfinished_aux_import(res))
+ iris_resource_finish_aux_import(screen, res);
+
+ struct iris_bo *bo = wants_aux ? res->aux.bo : res->bo;
+
iris_resource_disable_aux_on_first_query(resource, handle_usage);
switch (param) {