From: Marek Olšák Date: Thu, 26 Jan 2017 01:56:15 +0000 (+0100) Subject: radeonsi: atomize the scratch buffer state X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=408f9a1584c164076a1bbd8aecde9d305b7e21eb;p=mesa.git radeonsi: atomize the scratch buffer state The update frequency is very low. Difference: Only account for the size when allocating a new one and when starting a new IB, and check for NULL. (v3) Reviewed-by: Nicolai Hähnle --- diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index b39825627ed..e1987651e60 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -283,7 +283,8 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, PIPE_USAGE_DEFAULT, scratch_size); if (!sctx->scratch_buffer) return; - sctx->emit_scratch_reloc = true; + + si_mark_atom_dirty(sctx, &sctx->scratch_state); } si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b, diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index e5da730458a..c80b884970a 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -235,6 +235,12 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->b.scissors.atom); si_mark_atom_dirty(ctx, &ctx->b.viewports.atom); + si_mark_atom_dirty(ctx, &ctx->scratch_state); + if (ctx->scratch_buffer) { + r600_context_add_resource_size(&ctx->b.b, + &ctx->scratch_buffer->b.b); + } + r600_postflush_resume_features(&ctx->b); assert(!ctx->b.gfx.cs->prev_dw); @@ -251,7 +257,6 @@ void si_begin_new_cs(struct si_context *ctx) ctx->last_multi_vgt_param = -1; ctx->last_rast_prim = -1; ctx->last_sc_line_stipple = ~0; - ctx->emit_scratch_reloc = true; ctx->last_ls = NULL; ctx->last_tcs = NULL; ctx->last_tes_sh_base = -1; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 724d89e7475..d17d55a1f11 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -352,8 +352,8 @@ struct si_context { bool gs_tri_strip_adj_fix; /* Scratch buffer */ + struct r600_atom scratch_state; struct r600_resource *scratch_buffer; - bool emit_scratch_reloc; unsigned scratch_waves; unsigned spi_tmpring_size; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 19880c5cfdd..6bb067333f3 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -152,6 +152,7 @@ union si_state_atoms { struct r600_atom *viewports; struct r600_atom *stencil_ref; struct r600_atom *spi_map; + struct r600_atom *scratch_state; } s; struct r600_atom *array[0]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c82ed3a1312..fb5954eb6c5 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -459,25 +459,6 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, return ia_multi_vgt_param; } -static void si_emit_scratch_reloc(struct si_context *sctx) -{ - struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - - if (!sctx->emit_scratch_reloc) - return; - - radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, - sctx->spi_tmpring_size); - - if (sctx->scratch_buffer) { - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - sctx->scratch_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SCRATCH_BUFFER); - - } - sctx->emit_scratch_reloc = false; -} - /* rast_prim is the primitive type after GS. */ static void si_emit_rasterizer_prim_state(struct si_context *sctx) { @@ -1133,8 +1114,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } /* Add buffer sizes for memory checking in need_cs_space. */ - if (sctx->emit_scratch_reloc && sctx->scratch_buffer) - r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b); if (info->indirect) r600_context_add_resource_size(ctx, info->indirect); @@ -1174,14 +1153,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } sctx->dirty_states = 0; - si_emit_scratch_reloc(sctx); si_emit_rasterizer_prim_state(sctx); si_emit_draw_registers(sctx, info); si_ce_pre_draw_synchronization(sctx); - si_emit_draw_packets(sctx, info, &ib); - si_ce_post_draw_synchronization(sctx); if (sctx->trace_buf) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ebd243545d4..bde02f520a8 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2200,7 +2200,10 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) PIPE_USAGE_DEFAULT, scratch_needed_size); if (!sctx->scratch_buffer) return false; - sctx->emit_scratch_reloc = true; + + si_mark_atom_dirty(sctx, &sctx->scratch_state); + r600_context_add_resource_size(&sctx->b.b, + &sctx->scratch_buffer->b.b); } /* Update the shaders, so they are using the latest scratch. The @@ -2259,7 +2262,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10); if (spi_tmpring_size != sctx->spi_tmpring_size) { sctx->spi_tmpring_size = spi_tmpring_size; - sctx->emit_scratch_reloc = true; + si_mark_atom_dirty(sctx, &sctx->scratch_state); } return true; } @@ -2588,9 +2591,26 @@ bool si_update_shaders(struct si_context *sctx) return true; } +static void si_emit_scratch_state(struct si_context *sctx, + struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + + radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, + sctx->spi_tmpring_size); + + if (sctx->scratch_buffer) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + sctx->scratch_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_SCRATCH_BUFFER); + } +} + void si_init_shader_functions(struct si_context *sctx) { si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); + si_init_atom(sctx, &sctx->scratch_state, &sctx->atoms.s.scratch_state, + si_emit_scratch_state); sctx->b.b.create_vs_state = si_create_shader_selector; sctx->b.b.create_tcs_state = si_create_shader_selector;