}
}
- assert(!tex->tc_compatible_htile || levels_z == 0);
- assert(!tex->tc_compatible_htile || levels_s == 0 ||
- !r600_can_sample_zs(tex, true));
-
/* We may have to allocate the flushed texture here when called from
* si_decompress_subresource.
*/
}
if (inplace_planes) {
- si_blit_decompress_zs_in_place(
- sctx, tex,
- levels_z, levels_s,
- first_layer, last_layer);
+ if (!tex->tc_compatible_htile) {
+ si_blit_decompress_zs_in_place(
+ sctx, tex,
+ levels_z, levels_s,
+ first_layer, last_layer);
+ }
+
+ /* Only in-place decompression needs to flush DB caches, or
+ * when we don't decompress but TC-compatible planes are dirty.
+ */
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_INV_VMEM_L1;
+
+ /* If we flush DB caches for TC-compatible depth, the dirty
+ * state becomes 0 for the whole mipmap tree and all planes.
+ * (there is nothing else to flush)
+ */
+ if (tex->tc_compatible_htile) {
+ if (r600_can_sample_zs(tex, false))
+ tex->dirty_level_mask = 0;
+ if (r600_can_sample_zs(tex, true))
+ tex->stencil_dirty_level_mask = 0;
+ }
}
}
rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
last_level - base_level);
+ sctx->generate_mipmap_for_depth = rtex->is_depth;
+
si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
util_blitter_generate_mipmap(sctx->blitter, tex, format,
base_level, last_level,
first_layer, last_layer);
si_blitter_end(ctx);
+
+ sctx->generate_mipmap_for_depth = false;
return true;
}
(rtex->cmask.size || rtex->dcc_offset));
}
-static bool depth_needs_decompression(struct r600_texture *rtex,
- struct si_sampler_view *sview)
+static bool depth_needs_decompression(struct r600_texture *rtex)
{
- return rtex->db_compatible &&
- (!rtex->tc_compatible_htile ||
- !r600_can_sample_zs(rtex, sview->is_stencil_sampler));
+ /* If the depth/stencil texture is TC-compatible, no decompression
+ * will be done. The decompression function will only flush DB caches
+ * to make it coherent with shaders. That's necessary because the driver
+ * doesn't flush DB caches in any other case.
+ */
+ return rtex->db_compatible;
}
static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
(struct r600_texture*)views[i]->texture;
- struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
- if (depth_needs_decompression(rtex, rview)) {
+ if (depth_needs_decompression(rtex)) {
samplers->needs_depth_decompress_mask |= 1u << slot;
} else {
samplers->needs_depth_decompress_mask &= ~(1u << slot);
struct r600_texture *rtex =
(struct r600_texture *)sview->base.texture;
- if (depth_needs_decompression(rtex, sview)) {
+ if (depth_needs_decompression(rtex)) {
util_dynarray_append(
&sctx->resident_tex_needs_depth_decompress,
struct si_texture_handle *,
bool db_stencil_clear:1;
bool db_stencil_disable_expclear:1;
bool occlusion_queries_disabled:1;
+ bool generate_mipmap_for_depth:1;
/* Emitted draw state. */
bool gs_tri_strip_adj_fix:1;
* the only client not using TC that can change textures is
* the framebuffer.
*
- * Flush all CB and DB caches here because all buffers can be used
- * for write by both TC (with shader image stores) and CB/DB.
+ * Wait for compute shaders because of possible transitions:
+ * - FB write -> shader read
+ * - shader write -> FB read
+ *
+ * DB caches are flushed on demand (using si_decompress_textures).
*/
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_CS_PARTIAL_FLUSH;
+ /* u_blitter doesn't invoke depth decompression when it does multiple
+ * blits in a row, but the only case when it matters for DB is when
+ * doing generate_mipmap. So here we flush DB manually between
+ * individual generate_mipmap blits.
+ * Note that lower mipmap levels aren't compressed.
+ */
+ if (sctx->generate_mipmap_for_depth)
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
*/
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
+ /* Depth and stencil are flushed in si_decompress_textures when needed. */
if (flags & PIPE_BARRIER_FRAMEBUFFER)
- sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB;
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
if (flags & (PIPE_BARRIER_FRAMEBUFFER |
PIPE_BARRIER_INDIRECT_BUFFER))
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
- if (!rtex->tc_compatible_htile)
- rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+ rtex->dirty_level_mask |= 1 << surf->u.tex.level;
- if (rtex->surface.flags & RADEON_SURF_SBUFFER &&
- (!rtex->tc_compatible_htile || !rtex->can_sample_s))
+ if (rtex->surface.flags & RADEON_SURF_SBUFFER)
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
}
if (sctx->framebuffer.compressed_cb_mask) {