From 0c2eed0edec877584c9362bd9cb9004ff10a8b91 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 29 Aug 2015 02:32:13 +0200 Subject: [PATCH] radeonsi: avoid redundant CB and DB register updates MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The main idea is to avoid setting CB_COLORi_INFO = 0 for i>0 repeatedly when those colorbuffers aren't used. This is mainly for glamor. Same for DB. Z_INFO and STENCIL_INFO need to be cleared only once. Reviewed-by: Alex Deucher Acked-by: Christian König --- src/gallium/drivers/r600/r600_blit.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.h | 2 +- src/gallium/drivers/radeon/r600_texture.c | 4 ++- src/gallium/drivers/radeonsi/si_blit.c | 7 +++-- src/gallium/drivers/radeonsi/si_hw_context.c | 5 +++- src/gallium/drivers/radeonsi/si_pipe.h | 2 ++ src/gallium/drivers/radeonsi/si_state.c | 26 ++++++++++++++----- 7 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 22a0950a491..08b2f644cad 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -395,7 +395,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) { evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom, - &buffers, color); + &buffers, NULL, color); if (!buffers) return; /* all buffers have been fast cleared */ } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 29db1cc4e07..d22c230ea3c 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -562,7 +562,7 @@ unsigned r600_translate_colorswap(enum pipe_format format); void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, - unsigned *buffers, + unsigned *buffers, unsigned *dirty_cbufs, const union pipe_color_union *color); void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); void r600_init_context_texture_functions(struct r600_common_context *rctx); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 54696910e43..89f18fb106f 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1217,7 +1217,7 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, - unsigned *buffers, + unsigned *buffers, unsigned *dirty_cbufs, const union pipe_color_union *color) { int i; @@ -1279,6 +1279,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, tex->cmask.offset, tex->cmask.size, 0, true); tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; + if (dirty_cbufs) + *dirty_cbufs |= 1 << i; rctx->set_atom_dirty(rctx, fb_state, true); *buffers &= ~clear_bit; } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index c28b2a80088..d1486bd822d 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -336,8 +336,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, zsbuf ? (struct r600_texture*)zsbuf->texture : NULL; if (buffers & PIPE_CLEAR_COLOR) { - evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom, - &buffers, color); + evergreen_do_fast_color_clear(&sctx->b, fb, + &sctx->framebuffer.atom, &buffers, + &sctx->framebuffer.dirty_cbufs, + color); if (!buffers) return; /* all buffers have been fast cleared */ } @@ -374,6 +376,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, } zstex->depth_clear_value = depth; + sctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */ sctx->db_depth_clear = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 2381b6c0004..561378196b5 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -184,8 +184,11 @@ void si_begin_new_cs(struct si_context *ctx) /* The CS initialization should be emitted before everything else. */ si_pm4_emit(ctx, ctx->init_config); - si_mark_atom_dirty(ctx, &ctx->clip_regs); + ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; + ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); + + si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->db_render_state); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 47ad619ccdc..9be4aa7d5b5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -127,6 +127,8 @@ struct si_framebuffer { unsigned cb0_is_integer; unsigned compressed_cb_mask; unsigned export_16bpc; + unsigned dirty_cbufs; + bool dirty_zsbuf; }; struct si_scissors { diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 88964e1a545..3c250484e3a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2109,6 +2109,13 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, SI_CONTEXT_INV_TC_L2 | SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + /* Take the maximum of the old and new count. If the new count is lower, + * dirtying is needed to disable the unbound colorbuffers. + */ + sctx->framebuffer.dirty_cbufs |= + (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; + sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; + util_copy_framebuffer_state(&sctx->framebuffer.state, state); sctx->framebuffer.export_16bpc = 0; @@ -2219,6 +2226,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom /* Colorbuffers. */ for (i = 0; i < nr_cbufs; i++) { + if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) + continue; + cb = (struct r600_surface*)state->cbufs[i]; if (!cb) { r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, @@ -2259,17 +2269,18 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ } /* set CB_COLOR1_INFO for possible dual-src blending */ - if (i == 1 && state->cbufs[0]) { + if (i == 1 && state->cbufs[0] && + sctx->framebuffer.dirty_cbufs & (1 << 0)) { r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, cb->cb_color_info | tex->cb_color_info); i++; } - for (; i < 8 ; i++) { - r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); - } + for (; i < 8 ; i++) + if (sctx->framebuffer.dirty_cbufs & (1 << i)) + r600_write_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); /* ZS buffer. */ - if (state->zsbuf) { + if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { struct r600_surface *zb = (struct r600_surface*)state->zsbuf; struct r600_texture *rtex = (struct r600_texture*)zb->base.texture; @@ -2304,7 +2315,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value)); r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, zb->pa_su_poly_offset_db_fmt_cntl); - } else { + } else if (sctx->framebuffer.dirty_zsbuf) { r600_write_context_reg_seq(cs, R_028040_DB_Z_INFO, 2); radeon_emit(cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */ radeon_emit(cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */ @@ -2314,6 +2325,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */ r600_write_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); + + sctx->framebuffer.dirty_cbufs = 0; + sctx->framebuffer.dirty_zsbuf = false; } static void si_emit_msaa_sample_locs(struct si_context *sctx, -- 2.30.2