From 226361082705be990804569ee731cd3d21749deb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 15 Jun 2017 00:34:08 +0200 Subject: [PATCH] radeonsi: flush DB caches only when transitioning from DB to texturing MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Use the mechanism of si_decompress_textures, but instead of doing the actual decompression, just flag the DB cache flush there. This removes a lot of unnecessary DB cache flushes. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_blit.c | 36 ++++++++++++++----- src/gallium/drivers/radeonsi/si_descriptors.c | 17 ++++----- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.c | 21 ++++++++--- src/gallium/drivers/radeonsi/si_state_draw.c | 6 ++-- 5 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index c62efbfa7d3..74f46705b3c 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -344,10 +344,6 @@ si_decompress_depth(struct si_context *sctx, } } - assert(!tex->tc_compatible_htile || levels_z == 0); - assert(!tex->tc_compatible_htile || levels_s == 0 || - !r600_can_sample_zs(tex, true)); - /* We may have to allocate the flushed texture here when called from * si_decompress_subresource. */ @@ -384,10 +380,30 @@ si_decompress_depth(struct si_context *sctx, } if (inplace_planes) { - si_blit_decompress_zs_in_place( - sctx, tex, - levels_z, levels_s, - first_layer, last_layer); + if (!tex->tc_compatible_htile) { + si_blit_decompress_zs_in_place( + sctx, tex, + levels_z, levels_s, + first_layer, last_layer); + } + + /* Only in-place decompression needs to flush DB caches, or + * when we don't decompress but TC-compatible planes are dirty. + */ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_INV_VMEM_L1; + + /* If we flush DB caches for TC-compatible depth, the dirty + * state becomes 0 for the whole mipmap tree and all planes. + * (there is nothing else to flush) + */ + if (tex->tc_compatible_htile) { + if (r600_can_sample_zs(tex, false)) + tex->dirty_level_mask = 0; + if (r600_can_sample_zs(tex, true)) + tex->stencil_dirty_level_mask = 0; + } } } @@ -1352,11 +1368,15 @@ static boolean si_generate_mipmap(struct pipe_context *ctx, rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1, last_level - base_level); + sctx->generate_mipmap_for_depth = rtex->is_depth; + si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND); util_blitter_generate_mipmap(sctx->blitter, tex, format, base_level, last_level, first_layer, last_layer); si_blitter_end(ctx); + + sctx->generate_mipmap_for_depth = false; return true; } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index a7031ec3632..acf6fb47fae 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -584,12 +584,14 @@ static bool color_needs_decompression(struct r600_texture *rtex) (rtex->cmask.size || rtex->dcc_offset)); } -static bool depth_needs_decompression(struct r600_texture *rtex, - struct si_sampler_view *sview) +static bool depth_needs_decompression(struct r600_texture *rtex) { - return rtex->db_compatible && - (!rtex->tc_compatible_htile || - !r600_can_sample_zs(rtex, sview->is_stencil_sampler)); + /* If the depth/stencil texture is TC-compatible, no decompression + * will be done. The decompression function will only flush DB caches + * to make it coherent with shaders. That's necessary because the driver + * doesn't flush DB caches in any other case. + */ + return rtex->db_compatible; } static void si_update_shader_needs_decompress_mask(struct si_context *sctx, @@ -633,9 +635,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) { struct r600_texture *rtex = (struct r600_texture*)views[i]->texture; - struct si_sampler_view *rview = (struct si_sampler_view *)views[i]; - if (depth_needs_decompression(rtex, rview)) { + if (depth_needs_decompression(rtex)) { samplers->needs_depth_decompress_mask |= 1u << slot; } else { samplers->needs_depth_decompress_mask &= ~(1u << slot); @@ -2470,7 +2471,7 @@ static void si_make_texture_handle_resident(struct pipe_context *ctx, struct r600_texture *rtex = (struct r600_texture *)sview->base.texture; - if (depth_needs_decompression(rtex, sview)) { + if (depth_needs_decompression(rtex)) { util_dynarray_append( &sctx->resident_tex_needs_depth_decompress, struct si_texture_handle *, diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 0c77f933127..1c174083819 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -362,6 +362,7 @@ struct si_context { bool db_stencil_clear:1; bool db_stencil_disable_expclear:1; bool occlusion_queries_disabled:1; + bool generate_mipmap_for_depth:1; /* Emitted draw state. */ bool gs_tri_strip_adj_fix:1; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 831c6b8ea32..193816d2bf7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2525,15 +2525,26 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * the only client not using TC that can change textures is * the framebuffer. * - * Flush all CB and DB caches here because all buffers can be used - * for write by both TC (with shader image stores) and CB/DB. + * Wait for compute shaders because of possible transitions: + * - FB write -> shader read + * - shader write -> FB read + * + * DB caches are flushed on demand (using si_decompress_textures). */ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_FLUSH_AND_INV_CB | - SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_CS_PARTIAL_FLUSH; + /* u_blitter doesn't invoke depth decompression when it does multiple + * blits in a row, but the only case when it matters for DB is when + * doing generate_mipmap. So here we flush DB manually between + * individual generate_mipmap blits. + * Note that lower mipmap levels aren't compressed. + */ + if (sctx->generate_mipmap_for_depth) + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB; + /* Take the maximum of the old and new count. If the new count is lower, * dirtying is needed to disable the unbound colorbuffers. */ @@ -3990,9 +4001,9 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } + /* Depth and stencil are flushed in si_decompress_textures when needed. */ if (flags & PIPE_BARRIER_FRAMEBUFFER) - sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | - SI_CONTEXT_FLUSH_AND_INV_DB; + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB; if (flags & (PIPE_BARRIER_FRAMEBUFFER | PIPE_BARRIER_INDIRECT_BUFFER)) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index d039e015cc6..d13c8b7086f 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1402,11 +1402,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; struct r600_texture *rtex = (struct r600_texture *)surf->texture; - if (!rtex->tc_compatible_htile) - rtex->dirty_level_mask |= 1 << surf->u.tex.level; + rtex->dirty_level_mask |= 1 << surf->u.tex.level; - if (rtex->surface.flags & RADEON_SURF_SBUFFER && - (!rtex->tc_compatible_htile || !rtex->can_sample_s)) + if (rtex->surface.flags & RADEON_SURF_SBUFFER) rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; } if (sctx->framebuffer.compressed_cb_mask) { -- 2.30.2