From b8892bc81820884cd42ada8699d0c28cb8e39dda Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 10 Mar 2020 18:46:17 -0400 Subject: [PATCH] radeonsi: don't restore states at the beginning of IBs if they're shadowed Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_gfx_cs.c | 139 ++++++++++++----------- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_pm4.c | 17 ++- src/gallium/drivers/radeonsi/si_pm4.h | 2 +- 5 files changed, 92 insertions(+), 70 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 59f6c0141ca..6a50dcae242 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -251,7 +251,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_h if (ctx->current_saved_cs) si_saved_cs_reference(&ctx->current_saved_cs, NULL); - si_begin_new_gfx_cs(ctx); + si_begin_new_gfx_cs(ctx, false); ctx->gfx_flush_in_progress = false; } @@ -383,7 +383,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) ctx->last_gs_out_prim = 0; /* cleared by CLEAR_STATE */ } -void si_begin_new_gfx_cs(struct si_context *ctx) +void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) { if (ctx->is_debug) si_begin_gfx_cs_debug(ctx); @@ -411,9 +411,12 @@ void si_begin_new_gfx_cs(struct si_context *ctx) RADEON_PRIO_DESCRIPTORS); } - ctx->cs_shader_state.initialized = false; si_add_all_descriptors_to_bo_list(ctx); - si_shader_pointers_mark_dirty(ctx); + + if (first_cs || !ctx->shadowed_regs) { + si_shader_pointers_mark_dirty(ctx); + ctx->cs_shader_state.initialized = false; + } if (!ctx->has_graphics) { ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw; @@ -428,7 +431,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) /* set all valid group as dirty so they get reemited on * next draw command */ - si_pm4_reset_emitted(ctx); + si_pm4_reset_emitted(ctx, first_cs); /* The CS initialization should be emitted before everything else. */ if (ctx->cs_preamble_state) @@ -453,8 +456,9 @@ void si_begin_new_gfx_cs(struct si_context *ctx) /* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */ bool has_clear_state = ctx->screen->info.has_clear_state; - if (has_clear_state) { - ctx->framebuffer.dirty_cbufs = u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs); + if (has_clear_state || ctx->shadowed_regs) { + ctx->framebuffer.dirty_cbufs = + u_bit_consecutive(0, ctx->framebuffer.state.nr_cbufs); /* CLEAR_STATE disables the zbuffer, so only enable it if it's bound. */ ctx->framebuffer.dirty_zsbuf = ctx->framebuffer.state.zsbuf != NULL; } else { @@ -462,39 +466,72 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->framebuffer.dirty_zsbuf = true; } /* This should always be marked as dirty to set the framebuffer scissor - * at least. */ + * at least. + * + * Even with shadowed registers, we have to add buffers to the buffer list. + * All of these do that. + */ si_mark_atom_dirty(ctx, &ctx->atoms.s.framebuffer); - - si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs); - /* CLEAR_STATE sets zeros. */ - if (!has_clear_state || ctx->clip_state.any_nonzeros) - si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_state); - ctx->sample_locs_num_samples = 0; - si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_sample_locs); - si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_config); - /* CLEAR_STATE sets 0xffff. */ - if (!has_clear_state || ctx->sample_mask != 0xffff) - si_mark_atom_dirty(ctx, &ctx->atoms.s.sample_mask); - si_mark_atom_dirty(ctx, &ctx->atoms.s.cb_render_state); - /* CLEAR_STATE sets zeros. */ - if (!has_clear_state || ctx->blend_color.any_nonzeros) - si_mark_atom_dirty(ctx, &ctx->atoms.s.blend_color); - si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state); - if (ctx->chip_class >= GFX9) - si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state); - si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref); - si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map); - if (!ctx->screen->use_ngg_streamout) - si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable); si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond); - /* CLEAR_STATE disables all window rectangles. */ - if (!has_clear_state || ctx->num_window_rectangles > 0) - si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles); - - si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband); - si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); si_mark_atom_dirty(ctx, &ctx->atoms.s.viewports); + if (first_cs || !ctx->shadowed_regs) { + /* These don't add any buffers, so skip them with shadowing. */ + si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs); + /* CLEAR_STATE sets zeros. */ + if (!has_clear_state || ctx->clip_state.any_nonzeros) + si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_state); + ctx->sample_locs_num_samples = 0; + si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_sample_locs); + si_mark_atom_dirty(ctx, &ctx->atoms.s.msaa_config); + /* CLEAR_STATE sets 0xffff. */ + if (!has_clear_state || ctx->sample_mask != 0xffff) + si_mark_atom_dirty(ctx, &ctx->atoms.s.sample_mask); + si_mark_atom_dirty(ctx, &ctx->atoms.s.cb_render_state); + /* CLEAR_STATE sets zeros. */ + if (!has_clear_state || ctx->blend_color.any_nonzeros) + si_mark_atom_dirty(ctx, &ctx->atoms.s.blend_color); + si_mark_atom_dirty(ctx, &ctx->atoms.s.db_render_state); + if (ctx->chip_class >= GFX9) + si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state); + si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref); + si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map); + if (!ctx->screen->use_ngg_streamout) + si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable); + /* CLEAR_STATE disables all window rectangles. */ + if (!has_clear_state || ctx->num_window_rectangles > 0) + si_mark_atom_dirty(ctx, &ctx->atoms.s.window_rectangles); + si_mark_atom_dirty(ctx, &ctx->atoms.s.guardband); + si_mark_atom_dirty(ctx, &ctx->atoms.s.scissors); + + /* Invalidate various draw states so that they are emitted before + * the first draw call. */ + si_invalidate_draw_sh_constants(ctx); + ctx->last_index_size = -1; + ctx->last_primitive_restart_en = -1; + ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; + ctx->last_prim = -1; + ctx->last_multi_vgt_param = -1; + ctx->last_vs_state = ~0; + ctx->last_ls = NULL; + ctx->last_tcs = NULL; + ctx->last_tes_sh_base = -1; + ctx->last_num_tcs_input_cp = -1; + ctx->last_ls_hs_config = -1; /* impossible value */ + ctx->last_binning_enabled = -1; + + if (has_clear_state) { + si_set_tracked_regs_to_clear_state(ctx); + } else { + /* Set all register values to unknown. */ + ctx->tracked_regs.reg_saved = 0; + ctx->last_gs_out_prim = -1; /* unknown */ + } + + /* 0xffffffff is an impossible value to register SPI_PS_INPUT_CNTL_n */ + memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32); + } + si_mark_atom_dirty(ctx, &ctx->atoms.s.scratch_state); if (ctx->scratch_buffer) { si_context_add_resource_size(ctx, &ctx->scratch_buffer->b.b); @@ -510,24 +547,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) assert(!ctx->gfx_cs->prev_dw); ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw; - - /* Invalidate various draw states so that they are emitted before - * the first draw call. */ - si_invalidate_draw_sh_constants(ctx); - ctx->last_index_size = -1; - ctx->last_primitive_restart_en = -1; - ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; - ctx->last_prim = -1; - ctx->last_multi_vgt_param = -1; - ctx->last_vs_state = ~0; - ctx->last_ls = NULL; - ctx->last_tcs = NULL; - ctx->last_tes_sh_base = -1; - ctx->last_num_tcs_input_cp = -1; - ctx->last_ls_hs_config = -1; /* impossible value */ - ctx->last_binning_enabled = -1; ctx->small_prim_cull_info_dirty = ctx->small_prim_cull_info_buf != NULL; - ctx->prim_discard_compute_ib_initialized = false; /* Compute-based primitive discard: @@ -540,17 +560,4 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->index_ring_base = ctx->index_ring_size_per_ib; ctx->index_ring_offset = 0; - - if (!ctx->shadowed_regs) { - if (has_clear_state) { - si_set_tracked_regs_to_clear_state(ctx); - } else { - /* Set all register values to unknown. */ - ctx->tracked_regs.reg_saved = 0; - ctx->last_gs_out_prim = -1; /* unknown */ - } - } - - /* 0xffffffff is a impossible value to register SPI_PS_INPUT_CNTL_n */ - memset(ctx->tracked_regs.spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 40c7270e46e..aa76cb81325 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -676,7 +676,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign si_init_cp_reg_shadowing(sctx); } - si_begin_new_gfx_cs(sctx); + si_begin_new_gfx_cs(sctx, true); assert(sctx->gfx_cs->current.cdw == sctx->initial_gfx_cs_size); /* Initialize per-context buffers. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index c59bbd5463b..490dc13d30f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1420,7 +1420,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen); void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence); void si_allocate_gds(struct si_context *ctx); void si_set_tracked_regs_to_clear_state(struct si_context *ctx); -void si_begin_new_gfx_cs(struct si_context *ctx); +void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs); void si_need_gfx_cs_space(struct si_context *ctx); void si_unref_sdma_uploads(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index f3446267970..d1d003af1d4 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -119,8 +119,23 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state) state->atom.emit(sctx); } -void si_pm4_reset_emitted(struct si_context *sctx) +void si_pm4_reset_emitted(struct si_context *sctx, bool first_cs) { + if (!first_cs && sctx->shadowed_regs) { + /* Only dirty states that contain buffers, so that they are + * added to the buffer list on the next draw call. + */ + for (unsigned i = 0; i < SI_NUM_STATES; i++) { + struct si_pm4_state *state = sctx->emitted.array[i]; + + if (state && state->shader) { + sctx->emitted.array[i] = NULL; + sctx->dirty_states |= 1 << i; + } + } + return; + } + memset(&sctx->emitted, 0, sizeof(sctx->emitted)); sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES); } diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 772512596f1..f8edea4d0cb 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -61,6 +61,6 @@ void si_pm4_clear_state(struct si_pm4_state *state); void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx); void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state); -void si_pm4_reset_emitted(struct si_context *sctx); +void si_pm4_reset_emitted(struct si_context *sctx, bool first_cs); #endif -- 2.30.2