From f4d095cc651af005d5760aa9dd06e6ae7007fab6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 4 Jul 2017 16:11:16 +0200 Subject: [PATCH] radeonsi: update dirty_level_mask only when flushing or unbinding framebuffer This fixes corruption with bindless textures in Dawn Of War 3. The do_update_surf_dirtiness mechanism was complicated and dirty_level_mask was only updated after the first draw call. The problem is bindless textures are checked for decompression every draw call and we would only decompress after the first draw call. The solution is to set dirtiness after the last draw call to the framebuffer, so the (unconditional) decompression of bindless textures happens at the right time. Cc: 17.2 Tested-by: Samuel Pitoiset --- src/gallium/drivers/radeonsi/si_blit.c | 31 +++++++++++----- src/gallium/drivers/radeonsi/si_pipe.h | 1 - src/gallium/drivers/radeonsi/si_state.c | 38 ++++++++++++++++++-- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 31 ---------------- 5 files changed, 59 insertions(+), 43 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 631676bcd79..caa4c3cad61 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -121,9 +121,7 @@ si_blit_dbcb_copy(struct si_context *sctx, assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); - bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness; sctx->decompression_enabled = true; - sctx->framebuffer.do_update_surf_dirtiness = false; while (level_mask) { unsigned level = u_bit_scan(&level_mask); @@ -169,7 +167,6 @@ si_blit_dbcb_copy(struct si_context *sctx, } sctx->decompression_enabled = false; - sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); @@ -225,9 +222,7 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx, surf_tmpl.format = texture->resource.b.b.format; - bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness; sctx->decompression_enabled = true; - sctx->framebuffer.do_update_surf_dirtiness = false; while (level_mask) { unsigned level = u_bit_scan(&level_mask); @@ -267,7 +262,6 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx, texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; sctx->decompression_enabled = false; - sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; sctx->db_flush_depth_inplace = false; sctx->db_flush_stencil_inplace = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); @@ -474,9 +468,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx, custom_blend = sctx->custom_blend_eliminate_fastclear; } - bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness; sctx->decompression_enabled = true; - sctx->framebuffer.do_update_surf_dirtiness = false; while (level_mask) { unsigned level = u_bit_scan(&level_mask); @@ -519,7 +511,6 @@ static void si_blit_decompress_color(struct pipe_context *ctx, } sctx->decompression_enabled = false; - sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness; sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_GLOBAL_L2 | @@ -971,10 +962,32 @@ static void si_decompress_subresource(struct pipe_context *ctx, if (!(rtex->surface.flags & RADEON_SURF_SBUFFER)) planes &= ~PIPE_MASK_S; + /* If we've rendered into the framebuffer and it's a blitting + * source, make sure the decompression pass is invoked + * by dirtying the framebuffer. + */ + if (sctx->framebuffer.state.zsbuf && + sctx->framebuffer.state.zsbuf->u.tex.level == level && + sctx->framebuffer.state.zsbuf->texture == tex) + si_update_fb_dirtiness_after_rendering(sctx); + si_decompress_depth(sctx, rtex, planes, level, level, first_layer, last_layer); } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) { + /* If we've rendered into the framebuffer and it's a blitting + * source, make sure the decompression pass is invoked + * by dirtying the framebuffer. + */ + for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { + if (sctx->framebuffer.state.cbufs[i] && + sctx->framebuffer.state.cbufs[i]->u.tex.level == level && + sctx->framebuffer.state.cbufs[i]->texture == tex) { + si_update_fb_dirtiness_after_rendering(sctx); + break; + } + } + si_blit_decompress_color(ctx, rtex, level, level, first_layer, last_layer, false); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 93f1d653e13..f7e0486ddc1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -188,7 +188,6 @@ struct si_framebuffer { ubyte dirty_cbufs; bool dirty_zsbuf; bool any_dst_linear; - bool do_update_surf_dirtiness; }; struct si_clip_state { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 650651d43b9..7dadc4aa24e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2456,6 +2456,38 @@ static void si_init_depth_surface(struct si_context *sctx, surf->depth_initialized = true; } +void si_update_fb_dirtiness_after_rendering(struct si_context *sctx) +{ + if (sctx->decompression_enabled) + return; + + if (sctx->framebuffer.state.zsbuf) { + struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; + struct r600_texture *rtex = (struct r600_texture *)surf->texture; + + rtex->dirty_level_mask |= 1 << surf->u.tex.level; + + if (rtex->surface.flags & RADEON_SURF_SBUFFER) + rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; + } + if (sctx->framebuffer.compressed_cb_mask) { + struct pipe_surface *surf; + struct r600_texture *rtex; + unsigned mask = sctx->framebuffer.compressed_cb_mask; + + do { + unsigned i = u_bit_scan(&mask); + surf = sctx->framebuffer.state.cbufs[i]; + rtex = (struct r600_texture*)surf->texture; + + if (rtex->fmask.size) + rtex->dirty_level_mask |= 1 << surf->u.tex.level; + if (rtex->dcc_gather_statistics) + rtex->separate_dcc_dirty = true; + } while (mask); + } +} + static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) { for (int i = 0; i < state->nr_cbufs; ++i) { @@ -2483,6 +2515,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, bool unbound = false; int i; + si_update_fb_dirtiness_after_rendering(sctx); + for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { if (!sctx->framebuffer.state.cbufs[i]) continue; @@ -2680,7 +2714,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * changes come from the decompression passes themselves. */ sctx->need_check_render_feedback = true; - sctx->framebuffer.do_update_surf_dirtiness = true; } } @@ -3988,6 +4021,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) { struct si_context *sctx = (struct si_context *)ctx; + si_update_fb_dirtiness_after_rendering(sctx); + /* Multisample surfaces are flushed in si_decompress_textures. */ if (sctx->framebuffer.nr_samples <= 1 && sctx->framebuffer.state.nr_cbufs) { @@ -3995,7 +4030,6 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_FLUSH_AND_INV_CB; } - sctx->framebuffer.do_update_surf_dirtiness = true; } /* This only ensures coherency for shader image/buffer stores. */ diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index ec28abaf9a4..acc8fb7870a 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -384,6 +384,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, const struct pipe_sampler_view *state, unsigned width0, unsigned height0, unsigned force_level); +void si_update_fb_dirtiness_after_rendering(struct si_context *sctx); /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 332e0c43de8..c1edf7f3706 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1207,7 +1207,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) sctx->framebuffer.dirty_cbufs |= ((1 << sctx->framebuffer.state.nr_cbufs) - 1); sctx->framebuffer.dirty_zsbuf = true; - sctx->framebuffer.do_update_surf_dirtiness = true; si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); si_update_all_texture_descriptors(sctx); } @@ -1392,36 +1391,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC; } - if (sctx->framebuffer.do_update_surf_dirtiness) { - /* Set the depth buffer as dirty. */ - if (sctx->framebuffer.state.zsbuf) { - struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; - struct r600_texture *rtex = (struct r600_texture *)surf->texture; - - rtex->dirty_level_mask |= 1 << surf->u.tex.level; - - if (rtex->surface.flags & RADEON_SURF_SBUFFER) - rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; - } - if (sctx->framebuffer.compressed_cb_mask) { - struct pipe_surface *surf; - struct r600_texture *rtex; - unsigned mask = sctx->framebuffer.compressed_cb_mask; - - do { - unsigned i = u_bit_scan(&mask); - surf = sctx->framebuffer.state.cbufs[i]; - rtex = (struct r600_texture*)surf->texture; - - if (rtex->fmask.size) - rtex->dirty_level_mask |= 1 << surf->u.tex.level; - if (rtex->dcc_gather_statistics) - rtex->separate_dcc_dirty = true; - } while (mask); - } - sctx->framebuffer.do_update_surf_dirtiness = false; - } - sctx->b.num_draw_calls++; if (info->primitive_restart) sctx->b.num_prim_restart_calls++; -- 2.30.2