+unsigned
+iris_get_num_logical_layers(const struct iris_resource *res, unsigned level)
+{
+ if (res->surf.dim == ISL_SURF_DIM_3D)
+ return minify(res->surf.logical_level0_px.depth, level);
+ else
+ return res->surf.logical_level0_px.array_len;
+}
+
+static enum isl_aux_state **
+create_aux_state_map(struct iris_resource *res, enum isl_aux_state initial)
+{
+ uint32_t total_slices = 0;
+ for (uint32_t level = 0; level < res->surf.levels; level++)
+ total_slices += iris_get_num_logical_layers(res, level);
+
+ const size_t per_level_array_size =
+ res->surf.levels * sizeof(enum isl_aux_state *);
+
+ /* We're going to allocate a single chunk of data for both the per-level
+ * reference array and the arrays of aux_state. This makes cleanup
+ * significantly easier.
+ */
+ const size_t total_size =
+ per_level_array_size + total_slices * sizeof(enum isl_aux_state);
+
+ void *data = malloc(total_size);
+ if (!data)
+ return NULL;
+
+ enum isl_aux_state **per_level_arr = data;
+ enum isl_aux_state *s = data + per_level_array_size;
+ for (uint32_t level = 0; level < res->surf.levels; level++) {
+ per_level_arr[level] = s;
+ const unsigned level_layers = iris_get_num_logical_layers(res, level);
+ for (uint32_t a = 0; a < level_layers; a++)
+ *(s++) = initial;
+ }
+ assert((void *)s == data + total_size);
+
+ return per_level_arr;
+}
+
+static unsigned
+iris_get_aux_clear_color_state_size(struct iris_screen *screen)
+{
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ return devinfo->gen >= 10 ? screen->isl_dev.ss.clear_color_state_size : 0;
+}
+
+static void
+map_aux_addresses(struct iris_screen *screen, struct iris_resource *res)
+{
+ const struct gen_device_info *devinfo = &screen->devinfo;
+ if (devinfo->gen >= 12 && isl_aux_usage_has_ccs(res->aux.usage)) {
+ void *aux_map_ctx = iris_bufmgr_get_aux_map_context(screen->bufmgr);
+ assert(aux_map_ctx);
+ const bool has_extra_ccs = res->aux.extra_aux.surf.size_B > 0;
+ struct iris_bo *aux_bo = has_extra_ccs ?
+ res->aux.extra_aux.bo : res->aux.bo;
+ const unsigned aux_offset = has_extra_ccs ?
+ res->aux.extra_aux.offset : res->aux.offset;
+ gen_aux_map_add_image(aux_map_ctx, &res->surf, res->bo->gtt_offset,
+ aux_bo->gtt_offset + aux_offset);
+ res->bo->aux_map_address = aux_bo->gtt_offset;
+ }
+}
+
+static bool
+want_ccs_e_for_format(const struct gen_device_info *devinfo,
+ enum isl_format format)
+{
+ if (!isl_format_supports_ccs_e(devinfo, format))
+ return false;
+
+ const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+
+ /* CCS_E seems to significantly hurt performance with 32-bit floating
+ * point formats. For example, Paraview's "Wavelet Volume" case uses
+ * both R32_FLOAT and R32G32B32A32_FLOAT, and enabling CCS_E for those
+ * formats causes a 62% FPS drop.
+ *
+ * However, many benchmarks seem to use 16-bit float with no issues.
+ */
+ if (fmtl->channels.r.bits == 32 && fmtl->channels.r.type == ISL_SFLOAT)
+ return false;
+
+ return true;
+}
+
+/**
+ * Configure aux for the resource, but don't allocate it. For images which
+ * might be shared with modifiers, we must allocate the image and aux data in
+ * a single bo.
+ */
+static bool
+iris_resource_configure_aux(struct iris_screen *screen,
+ struct iris_resource *res, bool imported,
+ uint64_t *aux_size_B,
+ uint32_t *alloc_flags)
+{
+ const struct gen_device_info *devinfo = &screen->devinfo;
+
+ /* Try to create the auxiliary surfaces allowed by the modifier or by
+ * the user if no modifier is specified.
+ */
+ assert(!res->mod_info || res->mod_info->aux_usage == ISL_AUX_USAGE_NONE ||
+ res->mod_info->aux_usage == ISL_AUX_USAGE_CCS_E);
+
+ const bool has_mcs = !res->mod_info &&
+ isl_surf_get_mcs_surf(&screen->isl_dev, &res->surf, &res->aux.surf);
+
+ const bool has_hiz = !res->mod_info && !(INTEL_DEBUG & DEBUG_NO_HIZ) &&
+ isl_surf_get_hiz_surf(&screen->isl_dev, &res->surf, &res->aux.surf);
+
+ const bool has_ccs =
+ ((!res->mod_info && !(INTEL_DEBUG & DEBUG_NO_RBC)) ||
+ (res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE)) &&
+ isl_surf_get_ccs_surf(&screen->isl_dev, &res->surf, &res->aux.surf,
+ &res->aux.extra_aux.surf, 0);
+
+ /* Having both HIZ and MCS is impossible. */
+ assert(!has_mcs || !has_hiz);
+
+ /* Ensure aux surface creation for MCS_CCS and HIZ_CCS is correct. */
+ if (has_ccs && (has_mcs || has_hiz)) {
+ assert(res->aux.extra_aux.surf.size_B > 0 &&
+ res->aux.extra_aux.surf.usage & ISL_SURF_USAGE_CCS_BIT);
+ assert(res->aux.surf.size_B > 0 &&
+ res->aux.surf.usage &
+ (ISL_SURF_USAGE_HIZ_BIT | ISL_SURF_USAGE_MCS_BIT));
+ }
+
+ if (res->mod_info && has_ccs) {
+ /* Only allow a CCS modifier if the aux was created successfully. */
+ res->aux.possible_usages |= 1 << res->mod_info->aux_usage;
+ } else if (has_mcs) {
+ res->aux.possible_usages |= 1 << ISL_AUX_USAGE_MCS;
+ } else if (has_hiz) {
+ res->aux.possible_usages |=
+ 1 << (has_ccs ? ISL_AUX_USAGE_HIZ_CCS : ISL_AUX_USAGE_HIZ);
+ } else if (has_ccs) {
+ if (want_ccs_e_for_format(devinfo, res->surf.format))
+ res->aux.possible_usages |= 1 << ISL_AUX_USAGE_CCS_E;
+
+ if (isl_format_supports_ccs_d(devinfo, res->surf.format))
+ res->aux.possible_usages |= 1 << ISL_AUX_USAGE_CCS_D;
+ }
+
+ res->aux.usage = util_last_bit(res->aux.possible_usages) - 1;
+
+ res->aux.sampler_usages = res->aux.possible_usages;
+
+ /* We don't always support sampling with hiz. But when we do, it must be
+ * single sampled.
+ */
+ if (!devinfo->has_sample_with_hiz || res->surf.samples > 1)
+ res->aux.sampler_usages &= ~(1 << ISL_AUX_USAGE_HIZ);
+
+ /* We don't always support sampling with HIZ_CCS. But when we do, treat it
+ * as CCS_E.*/
+ res->aux.sampler_usages &= ~(1 << ISL_AUX_USAGE_HIZ_CCS);
+ if (isl_surf_supports_hiz_ccs_wt(devinfo, &res->surf, res->aux.usage))
+ res->aux.sampler_usages |= 1 << ISL_AUX_USAGE_CCS_E;
+
+ enum isl_aux_state initial_state;
+ *aux_size_B = 0;
+ *alloc_flags = 0;
+ assert(!res->aux.bo);
+
+ switch (res->aux.usage) {
+ case ISL_AUX_USAGE_NONE:
+ /* Having no aux buffer is only okay if there's no modifier with aux. */
+ return !res->mod_info || res->mod_info->aux_usage == ISL_AUX_USAGE_NONE;
+ case ISL_AUX_USAGE_HIZ:
+ case ISL_AUX_USAGE_HIZ_CCS:
+ initial_state = ISL_AUX_STATE_AUX_INVALID;
+ break;
+ case ISL_AUX_USAGE_MCS:
+ case ISL_AUX_USAGE_MCS_CCS:
+ /* The Ivybridge PRM, Vol 2 Part 1 p326 says:
+ *
+ * "When MCS buffer is enabled and bound to MSRT, it is required
+ * that it is cleared prior to any rendering."
+ *
+ * Since we only use the MCS buffer for rendering, we just clear it
+ * immediately on allocation. The clear value for MCS buffers is all
+ * 1's, so we simply memset it to 0xff.
+ */
+ initial_state = ISL_AUX_STATE_CLEAR;
+ break;
+ case ISL_AUX_USAGE_CCS_D:
+ case ISL_AUX_USAGE_CCS_E:
+ /* When CCS_E is used, we need to ensure that the CCS starts off in
+ * a valid state. From the Sky Lake PRM, "MCS Buffer for Render
+ * Target(s)":
+ *
+ * "If Software wants to enable Color Compression without Fast
+ * clear, Software needs to initialize MCS with zeros."
+ *
+ * A CCS value of 0 indicates that the corresponding block is in the
+ * pass-through state which is what we want.
+ *
+ * For CCS_D, do the same thing. On Gen9+, this avoids having any
+ * undefined bits in the aux buffer.
+ */
+ if (imported)
+ initial_state =
+ isl_drm_modifier_get_default_aux_state(res->mod_info->modifier);
+ else
+ initial_state = ISL_AUX_STATE_PASS_THROUGH;
+ *alloc_flags |= BO_ALLOC_ZEROED;
+ break;
+ }
+
+ if (!res->aux.state) {
+ /* Create the aux_state for the auxiliary buffer. */
+ res->aux.state = create_aux_state_map(res, initial_state);
+ if (!res->aux.state)
+ return false;
+ }
+
+ uint64_t size = res->aux.surf.size_B;
+
+ /* Allocate space in the buffer for storing the CCS. */
+ if (res->aux.extra_aux.surf.size_B > 0) {
+ res->aux.extra_aux.offset =
+ ALIGN(size, res->aux.extra_aux.surf.alignment_B);
+ size = res->aux.extra_aux.offset + res->aux.extra_aux.surf.size_B;
+ }
+
+ /* Allocate space in the buffer for storing the clear color. On modern
+ * platforms (gen > 9), we can read it directly from such buffer.
+ *
+ * On gen <= 9, we are going to store the clear color on the buffer
+ * anyways, and copy it back to the surface state during state emission.
+ */
+ res->aux.clear_color_offset = size;
+ size += iris_get_aux_clear_color_state_size(screen);
+ *aux_size_B = size;
+
+ if (isl_aux_usage_has_hiz(res->aux.usage)) {
+ for (unsigned level = 0; level < res->surf.levels; ++level) {
+ uint32_t width = u_minify(res->surf.phys_level0_sa.width, level);
+ uint32_t height = u_minify(res->surf.phys_level0_sa.height, level);
+
+ /* Disable HiZ for LOD > 0 unless the width/height are 8x4 aligned.
+ * For LOD == 0, we can grow the dimensions to make it work.
+ */
+ if (level == 0 || ((width & 7) == 0 && (height & 3) == 0))
+ res->aux.has_hiz |= 1 << level;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Initialize the aux buffer contents.
+ */
+static bool
+iris_resource_init_aux_buf(struct iris_resource *res, uint32_t alloc_flags,
+ unsigned clear_color_state_size)
+{
+ if (!(alloc_flags & BO_ALLOC_ZEROED)) {
+ void *map = iris_bo_map(NULL, res->aux.bo, MAP_WRITE | MAP_RAW);
+
+ if (!map) {
+ iris_resource_disable_aux(res);
+ return false;
+ }
+
+ if (iris_resource_get_aux_state(res, 0, 0) != ISL_AUX_STATE_AUX_INVALID) {
+ uint8_t memset_value = isl_aux_usage_has_mcs(res->aux.usage) ? 0xFF : 0;
+ memset((char*)map + res->aux.offset, memset_value,
+ res->aux.surf.size_B);
+ }
+
+ /* Bspec section titled : MCS/CCS Buffers for Render Target(s) states:
+ * - If Software wants to enable Color Compression without Fast clear,
+ * Software needs to initialize MCS with zeros.
+ * - Lossless compression and CCS initialized to all F (using HW Fast
+ * Clear or SW direct Clear)
+ *
+ * We think, the first bullet point above is referring to CCS aux
+ * surface. Since we initialize the MCS in the clear state, we also
+ * initialize the CCS in the clear state (via SW direct clear) to keep
+ * the two in sync.
+ */
+ memset((char*)map + res->aux.extra_aux.offset,
+ isl_aux_usage_has_mcs(res->aux.usage) ? 0xFF : 0,
+ res->aux.extra_aux.surf.size_B);
+
+ /* Zero the indirect clear color to match ::fast_clear_color. */
+ memset((char *)map + res->aux.clear_color_offset, 0,
+ clear_color_state_size);
+
+ iris_bo_unmap(res->aux.bo);
+ }
+
+ if (res->aux.extra_aux.surf.size_B > 0) {
+ res->aux.extra_aux.bo = res->aux.bo;
+ iris_bo_reference(res->aux.extra_aux.bo);
+ }
+
+ if (clear_color_state_size > 0) {
+ res->aux.clear_color_bo = res->aux.bo;
+ iris_bo_reference(res->aux.clear_color_bo);
+ }
+
+ return true;
+}
+
+/**
+ * Allocate the initial aux surface for a resource based on aux.usage
+ */
+static bool
+iris_resource_alloc_separate_aux(struct iris_screen *screen,
+ struct iris_resource *res)
+{
+ uint32_t alloc_flags;
+ uint64_t size;
+ if (!iris_resource_configure_aux(screen, res, false, &size, &alloc_flags))
+ return false;
+
+ if (size == 0)
+ return true;
+
+ /* Allocate the auxiliary buffer. ISL has stricter set of alignment rules
+ * the drm allocator. Therefore, one can pass the ISL dimensions in terms
+ * of bytes instead of trying to recalculate based on different format
+ * block sizes.
+ */
+ res->aux.bo = iris_bo_alloc_tiled(screen->bufmgr, "aux buffer", size, 4096,
+ IRIS_MEMZONE_OTHER,
+ isl_tiling_to_i915_tiling(res->aux.surf.tiling),
+ res->aux.surf.row_pitch_B, alloc_flags);
+ if (!res->aux.bo) {
+ return false;
+ }
+
+ if (!iris_resource_init_aux_buf(res, alloc_flags,
+ iris_get_aux_clear_color_state_size(screen)))
+ return false;
+
+ map_aux_addresses(screen, res);
+
+ return true;
+}
+
+void
+iris_resource_finish_aux_import(struct pipe_screen *pscreen,
+ struct iris_resource *res)
+{
+ struct iris_screen *screen = (struct iris_screen *)pscreen;
+ assert(iris_resource_unfinished_aux_import(res));
+ assert(!res->mod_info->supports_clear_color);
+
+ struct iris_resource *aux_res = (void *) res->base.next;
+ assert(aux_res->aux.surf.row_pitch_B && aux_res->aux.offset &&
+ aux_res->aux.bo);
+
+ assert(res->bo == aux_res->aux.bo);
+ iris_bo_reference(aux_res->aux.bo);
+ res->aux.bo = aux_res->aux.bo;
+
+ res->aux.offset = aux_res->aux.offset;
+
+ assert(res->bo->size >= (res->aux.offset + res->aux.surf.size_B));
+ assert(res->aux.clear_color_bo == NULL);
+ res->aux.clear_color_offset = 0;
+
+ assert(aux_res->aux.surf.row_pitch_B == res->aux.surf.row_pitch_B);
+
+ unsigned clear_color_state_size =
+ iris_get_aux_clear_color_state_size(screen);
+
+ if (clear_color_state_size > 0) {
+ res->aux.clear_color_bo =
+ iris_bo_alloc(screen->bufmgr, "clear color buffer",
+ clear_color_state_size, IRIS_MEMZONE_OTHER);
+ res->aux.clear_color_offset = 0;
+ }
+
+ iris_resource_destroy(&screen->base, res->base.next);
+ res->base.next = NULL;
+}
+