X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Firis%2Firis_resolve.c;h=37af7920ada90f3f26af2b08e2c4c68f5de8389e;hb=a03d17ede778610f2c66099d0d5342cf09ef12a2;hp=752eeef20846e353e50a34456457f5ca025cc767;hpb=a4da6008b6a0d8876eaf5a67c95d88038bbf35e6;p=mesa.git diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c index 752eeef2084..37af7920ada 100644 --- a/src/gallium/drivers/iris/iris_resolve.c +++ b/src/gallium/drivers/iris/iris_resolve.c @@ -54,7 +54,8 @@ disable_rb_aux_buffer(struct iris_context *ice, /* We only need to worry about color compression and fast clears. */ if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D && - tex_res->aux.usage != ISL_AUX_USAGE_CCS_E) + tex_res->aux.usage != ISL_AUX_USAGE_CCS_E && + tex_res->aux.usage != ISL_AUX_USAGE_GEN12_CCS_E) return false; for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { @@ -90,8 +91,6 @@ resolve_sampler_views(struct iris_context *ice, { uint32_t views = info ? (shs->bound_sampler_views & info->textures_used) : 0; - unsigned astc5x5_wa_bits = 0; // XXX: actual tracking - while (views) { const int i = u_bit_scan(&views); struct iris_sampler_view *isv = shs->textures[i]; @@ -104,14 +103,13 @@ resolve_sampler_views(struct iris_context *ice, "for sampling"); } - iris_resource_prepare_texture(ice, batch, res, isv->view.format, + iris_resource_prepare_texture(ice, res, isv->view.format, isv->view.base_level, isv->view.levels, isv->view.base_array_layer, - isv->view.array_len, - astc5x5_wa_bits); + isv->view.array_len); } - iris_cache_flush_for_read(batch, res->bo); + iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_OTHER_READ); } } @@ -119,11 +117,11 @@ static void resolve_image_views(struct iris_context *ice, struct iris_batch *batch, struct iris_shader_state *shs, + const struct shader_info *info, bool *draw_aux_buffer_disabled, bool consider_framebuffer) { - /* TODO: Consider images used by program */ - uint32_t views = shs->bound_image_views; + uint32_t views = info ? (shs->bound_image_views & info->images_used) : 0; while (views) { const int i = u_bit_scan(&views); @@ -140,14 +138,16 @@ resolve_image_views(struct iris_context *ice, unsigned num_layers = pview->u.tex.last_layer - pview->u.tex.first_layer + 1; - /* The data port doesn't understand any compression */ - iris_resource_prepare_access(ice, batch, res, + enum isl_aux_usage aux_usage = + iris_image_view_aux_usage(ice, pview, info); + + iris_resource_prepare_access(ice, res, pview->u.tex.level, 1, pview->u.tex.first_layer, num_layers, - ISL_AUX_USAGE_NONE, false); + aux_usage, false); } - iris_cache_flush_for_read(batch, res->bo); + iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_OTHER_READ); } } @@ -168,17 +168,15 @@ iris_predraw_resolve_inputs(struct iris_context *ice, struct iris_shader_state *shs = &ice->state.shaders[stage]; const struct shader_info *info = iris_get_shader_info(ice, stage); - uint64_t dirty = (IRIS_DIRTY_BINDINGS_VS << stage) | - (consider_framebuffer ? IRIS_DIRTY_BINDINGS_FS : 0); + uint64_t stage_dirty = (IRIS_STAGE_DIRTY_BINDINGS_VS << stage) | + (consider_framebuffer ? IRIS_STAGE_DIRTY_BINDINGS_FS : 0); - if (ice->state.dirty & dirty) { + if (ice->state.stage_dirty & stage_dirty) { resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled, consider_framebuffer); - resolve_image_views(ice, batch, shs, draw_aux_buffer_disabled, + resolve_image_views(ice, batch, shs, info, draw_aux_buffer_disabled, consider_framebuffer); } - - // XXX: ASTC hacks } void @@ -207,11 +205,13 @@ iris_predraw_resolve_framebuffer(struct iris_context *ice, zs_surf->u.tex.level, zs_surf->u.tex.first_layer, num_layers); - iris_cache_flush_for_depth(batch, z_res->bo); + iris_emit_buffer_barrier_for(batch, z_res->bo, + IRIS_DOMAIN_DEPTH_WRITE); } if (s_res) { - iris_cache_flush_for_depth(batch, s_res->bo); + iris_emit_buffer_barrier_for(batch, s_res->bo, + IRIS_DOMAIN_DEPTH_WRITE); } } } @@ -222,16 +222,15 @@ iris_predraw_resolve_framebuffer(struct iris_context *ice, struct iris_surface *surf = (void *) cso_fb->cbufs[i]; struct iris_resource *res = (void *) cso_fb->cbufs[i]->texture; - iris_resource_prepare_texture(ice, batch, res, surf->view.format, + iris_resource_prepare_texture(ice, res, surf->view.format, surf->view.base_level, 1, surf->view.base_array_layer, - surf->view.array_len, - 0); + surf->view.array_len); } } } - if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE)) { + if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS) { for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { struct iris_surface *surf = (void *) cso_fb->cbufs[i]; if (!surf) @@ -241,13 +240,13 @@ iris_predraw_resolve_framebuffer(struct iris_context *ice, enum isl_aux_usage aux_usage = iris_resource_render_aux_usage(ice, res, surf->view.format, - ice->state.blend_enables & (1u << i), draw_aux_buffer_disabled[i]); if (ice->state.draw_aux_usage[i] != aux_usage) { ice->state.draw_aux_usage[i] = aux_usage; /* XXX: Need to track which bindings to make dirty */ - ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS; + ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER; + ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; } iris_resource_prepare_render(ice, batch, res, surf->view.base_level, @@ -298,9 +297,6 @@ iris_postdraw_update_resolve_tracking(struct iris_context *ice, zs_surf->u.tex.first_layer, num_layers, ice->state.depth_writes_enabled); } - - if (ice->state.depth_writes_enabled) - iris_depth_cache_add_bo(batch, z_res->bo); } if (s_res) { @@ -309,14 +305,11 @@ iris_postdraw_update_resolve_tracking(struct iris_context *ice, zs_surf->u.tex.first_layer, num_layers, s_res->aux.usage); } - - if (ice->state.stencil_writes_enabled) - iris_depth_cache_add_bo(batch, s_res->bo); } } bool may_have_resolved_color = - ice->state.dirty & (IRIS_DIRTY_BINDINGS_FS | IRIS_DIRTY_BLEND_STATE); + ice->state.stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_FS; for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { struct iris_surface *surf = (void *) cso_fb->cbufs[i]; @@ -326,9 +319,6 @@ iris_postdraw_update_resolve_tracking(struct iris_context *ice, struct iris_resource *res = (void *) surf->base.texture; enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i]; - iris_render_cache_add_bo(batch, res->bo, surf->view.format, - aux_usage); - if (may_have_resolved_color) { union pipe_surface_desc *desc = &surf->base.u; unsigned num_layers = @@ -340,57 +330,6 @@ iris_postdraw_update_resolve_tracking(struct iris_context *ice, } } -/** - * Clear the cache-tracking sets. - */ -void -iris_cache_sets_clear(struct iris_batch *batch) -{ - hash_table_foreach(batch->cache.render, render_entry) - _mesa_hash_table_remove(batch->cache.render, render_entry); - - set_foreach(batch->cache.depth, depth_entry) - _mesa_set_remove(batch->cache.depth, depth_entry); -} - -/** - * Emits an appropriate flush for a BO if it has been rendered to within the - * same batchbuffer as a read that's about to be emitted. - * - * The GPU has separate, incoherent caches for the render cache and the - * sampler cache, along with other caches. Usually data in the different - * caches don't interact (e.g. we don't render to our driver-generated - * immediate constant data), but for render-to-texture in FBOs we definitely - * do. When a batchbuffer is flushed, the kernel will ensure that everything - * necessary is flushed before another use of that BO, but for reuse from - * different caches within a batchbuffer, it's all our responsibility. - */ -void -iris_flush_depth_and_render_caches(struct iris_batch *batch) -{ - iris_emit_pipe_control_flush(batch, - "cache tracker: render-to-texture", - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_FLUSH | - PIPE_CONTROL_CS_STALL); - - iris_emit_pipe_control_flush(batch, - "cache tracker: render-to-texture", - PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | - PIPE_CONTROL_CONST_CACHE_INVALIDATE); - - iris_cache_sets_clear(batch); -} - -void -iris_cache_flush_for_read(struct iris_batch *batch, - struct iris_bo *bo) -{ - if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) || - _mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) - iris_flush_depth_and_render_caches(batch); -} - static void * format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) { @@ -403,8 +342,7 @@ iris_cache_flush_for_render(struct iris_batch *batch, enum isl_format format, enum isl_aux_usage aux_usage) { - if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) - iris_flush_depth_and_render_caches(batch); + iris_emit_buffer_barrier_for(batch, bo, IRIS_DOMAIN_RENDER_WRITE); /* Check to see if this bo has been used by a previous rendering operation * but with a different format or aux usage. If it has, flush the render @@ -431,43 +369,16 @@ iris_cache_flush_for_render(struct iris_batch *batch, */ struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); - if (entry && entry->data != format_aux_tuple(format, aux_usage)) - iris_flush_depth_and_render_caches(batch); -} - -void -iris_render_cache_add_bo(struct iris_batch *batch, - struct iris_bo *bo, - enum isl_format format, - enum isl_aux_usage aux_usage) -{ -#ifndef NDEBUG - struct hash_entry *entry = - _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); - if (entry) { - /* Otherwise, someone didn't do a flush_for_render and that would be - * very bad indeed. - */ - assert(entry->data == format_aux_tuple(format, aux_usage)); + if (!entry) { + _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo, + format_aux_tuple(format, aux_usage)); + } else if (entry->data != format_aux_tuple(format, aux_usage)) { + iris_emit_pipe_control_flush(batch, + "cache tracker: render format mismatch", + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_CS_STALL); + entry->data = format_aux_tuple(format, aux_usage); } -#endif - - _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo, - format_aux_tuple(format, aux_usage)); -} - -void -iris_cache_flush_for_depth(struct iris_batch *batch, - struct iris_bo *bo) -{ - if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo)) - iris_flush_depth_and_render_caches(batch); -} - -void -iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo) -{ - _mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo); } static void @@ -480,7 +391,7 @@ iris_resolve_color(struct iris_context *ice, //DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer); struct blorp_surf surf; - iris_blorp_surf_for_resource(&ice->vtbl, &batch->screen->isl_dev, &surf, + iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, &res->base, res->aux.usage, level, true); iris_batch_maybe_flush(batch, 1500); @@ -500,24 +411,25 @@ iris_resolve_color(struct iris_context *ice, iris_emit_end_of_pipe_sync(batch, "color resolve: pre-flush", PIPE_CONTROL_RENDER_TARGET_FLUSH); + iris_batch_sync_region_start(batch); struct blorp_batch blorp_batch; blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); /* On Gen >= 12, Stencil buffer with lossless compression needs to be * resolve with WM_HZ_OP packet. */ - if (isl_surf_usage_is_stencil(res->surf.usage)) { + if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) { blorp_hiz_stencil_op(&blorp_batch, &surf, level, layer, 1, resolve_op); } else { blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1, - isl_format_srgb_to_linear(res->surf.format), - resolve_op); + res->surf.format, resolve_op); } blorp_batch_finish(&blorp_batch); /* See comment above */ iris_emit_end_of_pipe_sync(batch, "color resolve: post-flush", PIPE_CONTROL_RENDER_TARGET_FLUSH); + iris_batch_sync_region_end(batch); } static void @@ -533,41 +445,22 @@ iris_mcs_partial_resolve(struct iris_context *ice, assert(isl_aux_usage_has_mcs(res->aux.usage)); struct blorp_surf surf; - iris_blorp_surf_for_resource(&ice->vtbl, &batch->screen->isl_dev, &surf, + iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, &res->base, res->aux.usage, 0, true); + iris_emit_buffer_barrier_for(batch, res->bo, IRIS_DOMAIN_RENDER_WRITE); struct blorp_batch blorp_batch; + iris_batch_sync_region_start(batch); blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); - blorp_mcs_partial_resolve(&blorp_batch, &surf, - isl_format_srgb_to_linear(res->surf.format), + blorp_mcs_partial_resolve(&blorp_batch, &surf, res->surf.format, start_layer, num_layers); blorp_batch_finish(&blorp_batch); + iris_batch_sync_region_end(batch); } - -/** - * Return true if the format that will be used to access the resource is - * CCS_E-compatible with the resource's linear/non-sRGB format. - * - * Why use the linear format? Well, although the resourcemay be specified - * with an sRGB format, the usage of that color space/format can be toggled. - * Since our HW tends to support more linear formats than sRGB ones, we use - * this format variant for check for CCS_E compatibility. - */ -static bool -format_ccs_e_compat_with_resource(const struct gen_device_info *devinfo, - const struct iris_resource *res, - enum isl_format access_format) -{ - assert(res->aux.usage == ISL_AUX_USAGE_CCS_E); - - enum isl_format isl_format = isl_format_srgb_to_linear(res->surf.format); - return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format); -} - -static bool -sample_with_depth_aux(const struct gen_device_info *devinfo, - const struct iris_resource *res) +bool +iris_sample_with_depth_aux(const struct gen_device_info *devinfo, + const struct iris_resource *res) { switch (res->aux.usage) { case ISL_AUX_USAGE_HIZ: @@ -575,10 +468,9 @@ sample_with_depth_aux(const struct gen_device_info *devinfo, break; return false; case ISL_AUX_USAGE_HIZ_CCS: - /* Write through mode must have been enabled for prior writes. */ - if (isl_surf_supports_hiz_ccs_wt(devinfo, &res->surf, res->aux.usage)) - break; return false; + case ISL_AUX_USAGE_HIZ_CCS_WT: + break; default: return false; } @@ -683,8 +575,10 @@ iris_hiz_exec(struct iris_context *ice, iris_batch_maybe_flush(batch, 1500); + iris_batch_sync_region_start(batch); + struct blorp_surf surf; - iris_blorp_surf_for_resource(&ice->vtbl, &batch->screen->isl_dev, &surf, + iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf, &res->base, res->aux.usage, level, true); struct blorp_batch blorp_batch; @@ -714,6 +608,16 @@ iris_hiz_exec(struct iris_context *ice, "hiz op: post flush", PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DEPTH_STALL); + + iris_batch_sync_region_end(batch); +} + +static bool +level_has_aux(const struct iris_resource *res, uint32_t level) +{ + return isl_aux_usage_has_hiz(res->aux.usage) ? + iris_resource_level_has_hiz(res, level) : + res->aux.usage != ISL_AUX_USAGE_NONE; } /** @@ -795,424 +699,51 @@ iris_has_color_unresolved(const struct iris_resource *res, return false; } -static enum isl_aux_op -get_ccs_d_resolve_op(enum isl_aux_state aux_state, - enum isl_aux_usage aux_usage, - bool fast_clear_supported) -{ - assert(aux_usage == ISL_AUX_USAGE_NONE || aux_usage == ISL_AUX_USAGE_CCS_D); - - const bool ccs_supported = - (aux_usage == ISL_AUX_USAGE_CCS_D) && fast_clear_supported; - - switch (aux_state) { - case ISL_AUX_STATE_CLEAR: - case ISL_AUX_STATE_PARTIAL_CLEAR: - if (!ccs_supported) - return ISL_AUX_OP_FULL_RESOLVE; - else - return ISL_AUX_OP_NONE; - - case ISL_AUX_STATE_PASS_THROUGH: - return ISL_AUX_OP_NONE; - - case ISL_AUX_STATE_RESOLVED: - case ISL_AUX_STATE_AUX_INVALID: - case ISL_AUX_STATE_COMPRESSED_CLEAR: - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - break; - } - - unreachable("Invalid aux state for CCS_D"); -} - -static enum isl_aux_op -get_ccs_e_resolve_op(enum isl_aux_state aux_state, - enum isl_aux_usage aux_usage, - bool fast_clear_supported) -{ - /* CCS_E surfaces can be accessed as CCS_D if we're careful. */ - assert(aux_usage == ISL_AUX_USAGE_NONE || - aux_usage == ISL_AUX_USAGE_CCS_D || - aux_usage == ISL_AUX_USAGE_CCS_E); - - switch (aux_state) { - case ISL_AUX_STATE_CLEAR: - case ISL_AUX_STATE_PARTIAL_CLEAR: - if (fast_clear_supported) - return ISL_AUX_OP_NONE; - else if (aux_usage == ISL_AUX_USAGE_CCS_E) - return ISL_AUX_OP_PARTIAL_RESOLVE; - else - return ISL_AUX_OP_FULL_RESOLVE; - - case ISL_AUX_STATE_COMPRESSED_CLEAR: - if (aux_usage != ISL_AUX_USAGE_CCS_E) - return ISL_AUX_OP_FULL_RESOLVE; - else if (!fast_clear_supported) - return ISL_AUX_OP_PARTIAL_RESOLVE; - else - return ISL_AUX_OP_NONE; - - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - if (aux_usage != ISL_AUX_USAGE_CCS_E) - return ISL_AUX_OP_FULL_RESOLVE; - else - return ISL_AUX_OP_NONE; - - case ISL_AUX_STATE_PASS_THROUGH: - return ISL_AUX_OP_NONE; - - case ISL_AUX_STATE_RESOLVED: - case ISL_AUX_STATE_AUX_INVALID: - break; - } - - unreachable("Invalid aux state for CCS_E"); -} - -static void -iris_resource_prepare_ccs_access(struct iris_context *ice, - struct iris_batch *batch, - struct iris_resource *res, - uint32_t level, uint32_t layer, - enum isl_aux_usage aux_usage, - bool fast_clear_supported) -{ - enum isl_aux_state aux_state = iris_resource_get_aux_state(res, level, layer); - - enum isl_aux_op resolve_op; - if (res->aux.usage == ISL_AUX_USAGE_CCS_E) { - resolve_op = get_ccs_e_resolve_op(aux_state, aux_usage, - fast_clear_supported); - } else { - assert(res->aux.usage == ISL_AUX_USAGE_CCS_D); - resolve_op = get_ccs_d_resolve_op(aux_state, aux_usage, - fast_clear_supported); - } - - if (resolve_op != ISL_AUX_OP_NONE) { - iris_resolve_color(ice, batch, res, level, layer, resolve_op); - - switch (resolve_op) { - case ISL_AUX_OP_FULL_RESOLVE: - /* The CCS full resolve operation destroys the CCS and sets it to the - * pass-through state. (You can also think of this as being both a - * resolve and an ambiguate in one operation.) - */ - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_PASS_THROUGH); - break; - - case ISL_AUX_OP_PARTIAL_RESOLVE: - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_COMPRESSED_NO_CLEAR); - break; - - default: - unreachable("Invalid resolve op"); - } - } -} - -static void -iris_resource_finish_ccs_write(struct iris_context *ice, - struct iris_resource *res, - uint32_t level, uint32_t layer, - enum isl_aux_usage aux_usage) -{ - assert(aux_usage == ISL_AUX_USAGE_NONE || - aux_usage == ISL_AUX_USAGE_CCS_D || - aux_usage == ISL_AUX_USAGE_CCS_E); - - enum isl_aux_state aux_state = - iris_resource_get_aux_state(res, level, layer); - - if (res->aux.usage == ISL_AUX_USAGE_CCS_E) { - switch (aux_state) { - case ISL_AUX_STATE_CLEAR: - case ISL_AUX_STATE_PARTIAL_CLEAR: - assert(aux_usage == ISL_AUX_USAGE_CCS_E || - aux_usage == ISL_AUX_USAGE_CCS_D); - - if (aux_usage == ISL_AUX_USAGE_CCS_E) { - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_COMPRESSED_CLEAR); - } else if (aux_state != ISL_AUX_STATE_PARTIAL_CLEAR) { - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_PARTIAL_CLEAR); - } - break; - - case ISL_AUX_STATE_COMPRESSED_CLEAR: - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - assert(aux_usage == ISL_AUX_USAGE_CCS_E); - break; /* Nothing to do */ - - case ISL_AUX_STATE_PASS_THROUGH: - if (aux_usage == ISL_AUX_USAGE_CCS_E) { - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_COMPRESSED_NO_CLEAR); - } else { - /* Nothing to do */ - } - break; - - case ISL_AUX_STATE_RESOLVED: - case ISL_AUX_STATE_AUX_INVALID: - unreachable("Invalid aux state for CCS_E"); - } - } else { - assert(res->aux.usage == ISL_AUX_USAGE_CCS_D); - /* CCS_D is a bit simpler */ - switch (aux_state) { - case ISL_AUX_STATE_CLEAR: - assert(aux_usage == ISL_AUX_USAGE_CCS_D); - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_PARTIAL_CLEAR); - break; - - case ISL_AUX_STATE_PARTIAL_CLEAR: - assert(aux_usage == ISL_AUX_USAGE_CCS_D); - break; /* Nothing to do */ - - case ISL_AUX_STATE_PASS_THROUGH: - /* Nothing to do */ - break; - - case ISL_AUX_STATE_COMPRESSED_CLEAR: - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - case ISL_AUX_STATE_RESOLVED: - case ISL_AUX_STATE_AUX_INVALID: - unreachable("Invalid aux state for CCS_D"); - } - } -} - -static void -iris_resource_prepare_mcs_access(struct iris_context *ice, - struct iris_batch *batch, - struct iris_resource *res, - uint32_t layer, - enum isl_aux_usage aux_usage, - bool fast_clear_supported) -{ - assert(isl_aux_usage_has_mcs(aux_usage)); - - switch (iris_resource_get_aux_state(res, 0, layer)) { - case ISL_AUX_STATE_CLEAR: - case ISL_AUX_STATE_COMPRESSED_CLEAR: - if (!fast_clear_supported) { - iris_mcs_partial_resolve(ice, batch, res, layer, 1); - iris_resource_set_aux_state(ice, res, 0, layer, 1, - ISL_AUX_STATE_COMPRESSED_NO_CLEAR); - } - break; - - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - break; /* Nothing to do */ - - case ISL_AUX_STATE_RESOLVED: - case ISL_AUX_STATE_PASS_THROUGH: - case ISL_AUX_STATE_AUX_INVALID: - case ISL_AUX_STATE_PARTIAL_CLEAR: - unreachable("Invalid aux state for MCS"); - } -} - -static void -iris_resource_finish_mcs_write(struct iris_context *ice, - struct iris_resource *res, - uint32_t layer, - enum isl_aux_usage aux_usage) -{ - assert(isl_aux_usage_has_mcs(aux_usage)); - - switch (iris_resource_get_aux_state(res, 0, layer)) { - case ISL_AUX_STATE_CLEAR: - iris_resource_set_aux_state(ice, res, 0, layer, 1, - ISL_AUX_STATE_COMPRESSED_CLEAR); - break; - - case ISL_AUX_STATE_COMPRESSED_CLEAR: - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - break; /* Nothing to do */ - - case ISL_AUX_STATE_RESOLVED: - case ISL_AUX_STATE_PASS_THROUGH: - case ISL_AUX_STATE_AUX_INVALID: - case ISL_AUX_STATE_PARTIAL_CLEAR: - unreachable("Invalid aux state for MCS"); - } -} - -static void -iris_resource_prepare_hiz_access(struct iris_context *ice, - struct iris_batch *batch, - struct iris_resource *res, - uint32_t level, uint32_t layer, - enum isl_aux_usage aux_usage, - bool fast_clear_supported) -{ - assert(aux_usage == ISL_AUX_USAGE_NONE || - aux_usage == ISL_AUX_USAGE_HIZ || - aux_usage == ISL_AUX_USAGE_HIZ_CCS || - aux_usage == ISL_AUX_USAGE_CCS_E); - - enum isl_aux_op hiz_op = ISL_AUX_OP_NONE; - switch (iris_resource_get_aux_state(res, level, layer)) { - case ISL_AUX_STATE_CLEAR: - case ISL_AUX_STATE_COMPRESSED_CLEAR: - if (aux_usage == ISL_AUX_USAGE_NONE || !fast_clear_supported) - hiz_op = ISL_AUX_OP_FULL_RESOLVE; - break; - - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - if (aux_usage == ISL_AUX_USAGE_NONE) - hiz_op = ISL_AUX_OP_FULL_RESOLVE; - break; - - case ISL_AUX_STATE_PASS_THROUGH: - case ISL_AUX_STATE_RESOLVED: - break; - - case ISL_AUX_STATE_AUX_INVALID: - if (aux_usage != ISL_AUX_USAGE_NONE) - hiz_op = ISL_AUX_OP_AMBIGUATE; - break; - - case ISL_AUX_STATE_PARTIAL_CLEAR: - unreachable("Invalid HiZ state"); - } - - if (hiz_op != ISL_AUX_OP_NONE) { - iris_hiz_exec(ice, batch, res, level, layer, 1, hiz_op, false); - - switch (hiz_op) { - case ISL_AUX_OP_FULL_RESOLVE: - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_RESOLVED); - break; - - case ISL_AUX_OP_AMBIGUATE: - /* The HiZ resolve operation is actually an ambiguate */ - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_PASS_THROUGH); - break; - - default: - unreachable("Invalid HiZ op"); - } - } -} - -static void -iris_resource_finish_hiz_write(struct iris_context *ice, - struct iris_resource *res, - uint32_t level, uint32_t layer, - enum isl_aux_usage aux_usage) -{ - assert(aux_usage == ISL_AUX_USAGE_NONE || - isl_aux_usage_has_hiz(aux_usage)); - - switch (iris_resource_get_aux_state(res, level, layer)) { - case ISL_AUX_STATE_CLEAR: - assert(isl_aux_usage_has_hiz(aux_usage)); - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_COMPRESSED_CLEAR); - break; - - case ISL_AUX_STATE_COMPRESSED_NO_CLEAR: - case ISL_AUX_STATE_COMPRESSED_CLEAR: - assert(isl_aux_usage_has_hiz(aux_usage)); - break; /* Nothing to do */ - - case ISL_AUX_STATE_RESOLVED: - if (isl_aux_usage_has_hiz(aux_usage)) { - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_COMPRESSED_NO_CLEAR); - } else { - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_AUX_INVALID); - } - break; - - case ISL_AUX_STATE_PASS_THROUGH: - if (isl_aux_usage_has_hiz(aux_usage)) { - iris_resource_set_aux_state(ice, res, level, layer, 1, - ISL_AUX_STATE_COMPRESSED_NO_CLEAR); - } - break; - - case ISL_AUX_STATE_AUX_INVALID: - assert(!isl_aux_usage_has_hiz(aux_usage)); - break; - - case ISL_AUX_STATE_PARTIAL_CLEAR: - unreachable("Invalid HiZ state"); - } -} - void iris_resource_prepare_access(struct iris_context *ice, - struct iris_batch *batch, struct iris_resource *res, uint32_t start_level, uint32_t num_levels, uint32_t start_layer, uint32_t num_layers, enum isl_aux_usage aux_usage, bool fast_clear_supported) { - num_levels = miptree_level_range_length(res, start_level, num_levels); + /* We can't do resolves on the compute engine, so awkwardly, we have to + * do them on the render batch... + */ + struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - switch (res->aux.usage) { - case ISL_AUX_USAGE_NONE: - /* Nothing to do */ - break; + const uint32_t clamped_levels = + miptree_level_range_length(res, start_level, num_levels); + for (uint32_t l = 0; l < clamped_levels; l++) { + const uint32_t level = start_level + l; + if (!level_has_aux(res, level)) + continue; - case ISL_AUX_USAGE_MCS: - case ISL_AUX_USAGE_MCS_CCS: - assert(start_level == 0 && num_levels == 1); const uint32_t level_layers = - miptree_layer_range_length(res, 0, start_layer, num_layers); + miptree_layer_range_length(res, level, start_layer, num_layers); for (uint32_t a = 0; a < level_layers; a++) { - iris_resource_prepare_mcs_access(ice, batch, res, start_layer + a, - aux_usage, fast_clear_supported); - } - break; - - case ISL_AUX_USAGE_CCS_D: - case ISL_AUX_USAGE_CCS_E: - for (uint32_t l = 0; l < num_levels; l++) { - const uint32_t level = start_level + l; - const uint32_t level_layers = - miptree_layer_range_length(res, level, start_layer, num_layers); - for (uint32_t a = 0; a < level_layers; a++) { - iris_resource_prepare_ccs_access(ice, batch, res, level, - start_layer + a, - aux_usage, fast_clear_supported); + const uint32_t layer = start_layer + a; + const enum isl_aux_state aux_state = + iris_resource_get_aux_state(res, level, layer); + const enum isl_aux_op aux_op = + isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported); + + if (aux_op == ISL_AUX_OP_NONE) { + /* Nothing to do here. */ + } else if (isl_aux_usage_has_mcs(res->aux.usage)) { + assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE); + iris_mcs_partial_resolve(ice, batch, res, layer, 1); + } else if (isl_aux_usage_has_hiz(res->aux.usage)) { + iris_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false); + } else { + assert(isl_aux_usage_has_ccs(res->aux.usage)); + iris_resolve_color(ice, batch, res, level, layer, aux_op); } - } - break; - - case ISL_AUX_USAGE_HIZ: - case ISL_AUX_USAGE_HIZ_CCS: - for (uint32_t l = 0; l < num_levels; l++) { - const uint32_t level = start_level + l; - if (!iris_resource_level_has_hiz(res, level)) - continue; - const uint32_t level_layers = - miptree_layer_range_length(res, level, start_layer, num_layers); - for (uint32_t a = 0; a < level_layers; a++) { - iris_resource_prepare_hiz_access(ice, batch, res, level, - start_layer + a, aux_usage, - fast_clear_supported); - } + const enum isl_aux_state new_state = + isl_aux_state_transition_aux_op(aux_state, res->aux.usage, aux_op); + iris_resource_set_aux_state(ice, res, level, layer, 1, new_state); } - break; - - default: - unreachable("Invalid aux usage"); } } @@ -1222,41 +753,19 @@ iris_resource_finish_write(struct iris_context *ice, uint32_t start_layer, uint32_t num_layers, enum isl_aux_usage aux_usage) { - num_layers = miptree_layer_range_length(res, level, start_layer, num_layers); - - switch (res->aux.usage) { - case ISL_AUX_USAGE_NONE: - break; - - case ISL_AUX_USAGE_MCS: - case ISL_AUX_USAGE_MCS_CCS: - for (uint32_t a = 0; a < num_layers; a++) { - iris_resource_finish_mcs_write(ice, res, start_layer + a, - aux_usage); - } - break; - - case ISL_AUX_USAGE_CCS_D: - case ISL_AUX_USAGE_CCS_E: - for (uint32_t a = 0; a < num_layers; a++) { - iris_resource_finish_ccs_write(ice, res, level, start_layer + a, - aux_usage); - } - break; - - case ISL_AUX_USAGE_HIZ: - case ISL_AUX_USAGE_HIZ_CCS: - if (!iris_resource_level_has_hiz(res, level)) - return; - - for (uint32_t a = 0; a < num_layers; a++) { - iris_resource_finish_hiz_write(ice, res, level, start_layer + a, - aux_usage); - } - break; - - default: - unreachable("Invavlid aux usage"); + if (!level_has_aux(res, level)) + return; + + const uint32_t level_layers = + miptree_layer_range_length(res, level, start_layer, num_layers); + + for (uint32_t a = 0; a < level_layers; a++) { + const uint32_t layer = start_layer + a; + const enum isl_aux_state aux_state = + iris_resource_get_aux_state(res, level, layer); + const enum isl_aux_state new_aux_state = + isl_aux_state_transition_write(aux_state, aux_usage, false); + iris_resource_set_aux_state(ice, res, level, layer, 1, new_aux_state); } } @@ -1295,82 +804,42 @@ iris_resource_set_aux_state(struct iris_context *ice, if (res->aux.state[level][start_layer + a] != aux_state) { res->aux.state[level][start_layer + a] = aux_state; /* XXX: Need to track which bindings to make dirty */ - ice->state.dirty |= IRIS_ALL_DIRTY_BINDINGS; + ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER; + ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS; } } } -/* On Gen9 color buffers may be compressed by the hardware (lossless - * compression). There are, however, format restrictions and care needs to be - * taken that the sampler engine is capable for re-interpreting a buffer with - * format different the buffer was originally written with. - * - * For example, SRGB formats are not compressible and the sampler engine isn't - * capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying - * color buffer needs to be resolved so that the sampling surface can be - * sampled as non-compressed (i.e., without the auxiliary MCS buffer being - * set). - */ -static bool -can_texture_with_ccs(const struct gen_device_info *devinfo, - struct pipe_debug_callback *dbg, - const struct iris_resource *res, - enum isl_format view_format) -{ - if (res->aux.usage != ISL_AUX_USAGE_CCS_E) - return false; - - if (!format_ccs_e_compat_with_resource(devinfo, res, view_format)) { - const struct isl_format_layout *res_fmtl = - isl_format_get_layout(res->surf.format); - const struct isl_format_layout *view_fmtl = - isl_format_get_layout(view_format); - - perf_debug(dbg, "Incompatible sampling format (%s) for CCS (%s)\n", - view_fmtl->name, res_fmtl->name); - - return false; - } - - return true; -} - enum isl_aux_usage iris_resource_texture_aux_usage(struct iris_context *ice, const struct iris_resource *res, - enum isl_format view_format, - enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) + enum isl_format view_format) { struct iris_screen *screen = (void *) ice->ctx.screen; struct gen_device_info *devinfo = &screen->devinfo; - assert(devinfo->gen == 9 || astc5x5_wa_bits == 0); - - /* On gen9, ASTC 5x5 textures cannot live in the sampler cache along side - * CCS or HiZ compressed textures. See gen9_apply_astc5x5_wa_flush() for - * details. - */ - if ((astc5x5_wa_bits & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) && - res->aux.usage != ISL_AUX_USAGE_MCS) - return ISL_AUX_USAGE_NONE; - switch (res->aux.usage) { case ISL_AUX_USAGE_HIZ: - if (sample_with_depth_aux(devinfo, res)) + if (iris_sample_with_depth_aux(devinfo, res)) return ISL_AUX_USAGE_HIZ; break; case ISL_AUX_USAGE_HIZ_CCS: - if (sample_with_depth_aux(devinfo, res)) - return ISL_AUX_USAGE_CCS_E; + assert(!iris_sample_with_depth_aux(devinfo, res)); + return ISL_AUX_USAGE_NONE; + + case ISL_AUX_USAGE_HIZ_CCS_WT: + if (iris_sample_with_depth_aux(devinfo, res)) + return ISL_AUX_USAGE_HIZ_CCS_WT; break; case ISL_AUX_USAGE_MCS: case ISL_AUX_USAGE_MCS_CCS: + case ISL_AUX_USAGE_STC_CCS: return res->aux.usage; - case ISL_AUX_USAGE_CCS_D: case ISL_AUX_USAGE_CCS_E: + case ISL_AUX_USAGE_GEN12_CCS_E: /* If we don't have any unresolved color, report an aux usage of * ISL_AUX_USAGE_NONE. This way, texturing won't even look at the * aux surface and we can save some bandwidth. @@ -1379,8 +848,20 @@ iris_resource_texture_aux_usage(struct iris_context *ice, 0, INTEL_REMAINING_LAYERS)) return ISL_AUX_USAGE_NONE; - if (can_texture_with_ccs(devinfo, &ice->dbg, res, view_format)) - return ISL_AUX_USAGE_CCS_E; + /* On Gen9 color buffers may be compressed by the hardware (lossless + * compression). There are, however, format restrictions and care needs + * to be taken that the sampler engine is capable for re-interpreting a + * buffer with format different the buffer was originally written with. + * + * For example, SRGB formats are not compressible and the sampler engine + * isn't capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case + * the underlying color buffer needs to be resolved so that the sampling + * surface can be sampled as non-compressed (i.e., without the auxiliary + * MCS buffer being set). + */ + if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format, + view_format)) + return res->aux.usage; break; default: @@ -1390,6 +871,29 @@ iris_resource_texture_aux_usage(struct iris_context *ice, return ISL_AUX_USAGE_NONE; } +enum isl_aux_usage +iris_image_view_aux_usage(struct iris_context *ice, + const struct pipe_image_view *pview, + const struct shader_info *info) +{ + if (!info) + return ISL_AUX_USAGE_NONE; + + struct iris_resource *res = (void *) pview->resource; + + enum isl_format view_format = iris_image_view_get_format(ice, pview); + enum isl_aux_usage aux_usage = + iris_resource_texture_aux_usage(ice, res, view_format); + + bool uses_atomic_load_store = + ice->shaders.uncompiled[info->stage]->uses_atomic_load_store; + + if (aux_usage == ISL_AUX_USAGE_GEN12_CCS_E && !uses_atomic_load_store) + return ISL_AUX_USAGE_GEN12_CCS_E; + + return ISL_AUX_USAGE_NONE; +} + static bool isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) { @@ -1409,17 +913,15 @@ isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) void iris_resource_prepare_texture(struct iris_context *ice, - struct iris_batch *batch, struct iris_resource *res, enum isl_format view_format, uint32_t start_level, uint32_t num_levels, - uint32_t start_layer, uint32_t num_layers, - enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits) + uint32_t start_layer, uint32_t num_layers) { enum isl_aux_usage aux_usage = - iris_resource_texture_aux_usage(ice, res, view_format, astc5x5_wa_bits); + iris_resource_texture_aux_usage(ice, res, view_format); - bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; + bool clear_supported = isl_aux_usage_has_fast_clears(aux_usage); /* Clear color is specified as ints or floats and the conversion is done by * the sampler. If we have a texture view, we would have to perform the @@ -1428,16 +930,34 @@ iris_resource_prepare_texture(struct iris_context *ice, if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format)) clear_supported = false; - iris_resource_prepare_access(ice, batch, res, start_level, num_levels, + iris_resource_prepare_access(ice, res, start_level, num_levels, start_layer, num_layers, aux_usage, clear_supported); } +/* Whether or not rendering a color value with either format results in the + * same pixel. This can return false negatives. + */ +bool +iris_render_formats_color_compatible(enum isl_format a, enum isl_format b, + union isl_color_value color) +{ + if (a == b) + return true; + + /* A difference in color space doesn't matter for 0/1 values. */ + if (isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b) && + isl_color_value_is_zero_one(color, a)) { + return true; + } + + return false; +} + enum isl_aux_usage iris_resource_render_aux_usage(struct iris_context *ice, struct iris_resource *res, enum isl_format render_format, - bool blend_enabled, bool draw_aux_disabled) { struct iris_screen *screen = (void *) ice->ctx.screen; @@ -1453,23 +973,32 @@ iris_resource_render_aux_usage(struct iris_context *ice, case ISL_AUX_USAGE_CCS_D: case ISL_AUX_USAGE_CCS_E: - /* Gen9+ hardware technically supports non-0/1 clear colors with sRGB - * formats. However, there are issues with blending where it doesn't - * properly apply the sRGB curve to the clear color when blending. + case ISL_AUX_USAGE_GEN12_CCS_E: + /* Disable CCS for some cases of texture-view rendering. On gen12, HW + * may convert some subregions of shader output to fast-cleared blocks + * if CCS is enabled and the shader output matches the clear color. + * Existing fast-cleared blocks are correctly interpreted by the clear + * color and the resource format (see can_fast_clear_color). To avoid + * gaining new fast-cleared blocks that can't be interpreted by the + * resource format (and to avoid misinterpreting existing ones), shut + * off CCS when the interpretation of the clear color differs between + * the render_format and the resource format. */ - if (devinfo->gen >= 9 && blend_enabled && - isl_format_is_srgb(render_format) && - !isl_color_value_is_zero_one(res->aux.clear_color, render_format)) + if (!iris_render_formats_color_compatible(render_format, + res->surf.format, + res->aux.clear_color)) { return ISL_AUX_USAGE_NONE; + } - if (res->aux.usage == ISL_AUX_USAGE_CCS_E && - format_ccs_e_compat_with_resource(devinfo, res, render_format)) - return ISL_AUX_USAGE_CCS_E; - - /* Otherwise, we try to fall back to CCS_D */ - if (isl_format_supports_ccs_d(devinfo, render_format)) + if (res->aux.usage == ISL_AUX_USAGE_CCS_D) return ISL_AUX_USAGE_CCS_D; + if (isl_formats_are_ccs_e_compatible(devinfo, res->surf.format, + render_format)) { + return res->aux.usage; + } + /* fallthrough */ + default: return ISL_AUX_USAGE_NONE; } @@ -1482,9 +1011,9 @@ iris_resource_prepare_render(struct iris_context *ice, uint32_t start_layer, uint32_t layer_count, enum isl_aux_usage aux_usage) { - iris_resource_prepare_access(ice, batch, res, level, 1, start_layer, + iris_resource_prepare_access(ice, res, level, 1, start_layer, layer_count, aux_usage, - aux_usage != ISL_AUX_USAGE_NONE); + isl_aux_usage_has_fast_clears(aux_usage)); } void @@ -1503,7 +1032,7 @@ iris_resource_prepare_depth(struct iris_context *ice, struct iris_resource *res, uint32_t level, uint32_t start_layer, uint32_t layer_count) { - iris_resource_prepare_access(ice, batch, res, level, 1, start_layer, + iris_resource_prepare_access(ice, res, level, 1, start_layer, layer_count, res->aux.usage, !!res->aux.bo); }