From 8fee75d606e83b1f0d665fef9ea59ba24fc6682d Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 13 Apr 2016 22:31:17 +0200 Subject: [PATCH] radeonsi: Create CE IB. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Based on work by Marek Olšák. v2: Add preamble IB. Leaves the load packet in the space calculation as the radeon winsys might not be able to support a premable. The added space calculation may look expensive, but is converted to a constant with (at least) -O2 and -O3. v3: - Fix code style. - Remove needed space for vertex buffer descriptors. - Fail when the preamble cannot be created. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Marek Olšák --- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_hw_context.c | 35 ++++++++++++++++++- src/gallium/drivers/radeonsi/si_pipe.c | 15 ++++++++ src/gallium/drivers/radeonsi/si_pipe.h | 3 ++ 5 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index a7477abea34..a8660f20c86 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -402,6 +402,7 @@ static const struct debug_named_value common_debug_options[] = { { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, + { "noce", DBG_NO_CE, "Disable the constant engine"}, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index ba390a92319..44ab67537b1 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -95,6 +95,7 @@ #define DBG_NO_RB_PLUS (1llu << 45) #define DBG_SI_SCHED (1llu << 46) #define DBG_MONOLITHIC_SHADERS (1llu << 47) +#define DBG_NO_CE (1llu << 48) #define R600_MAP_BUFFER_ALIGNMENT 64 #define R600_MAX_VIEWPORTS 16 diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index b621b55abd3..5294898aa7f 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -26,10 +26,41 @@ #include "si_pipe.h" +static unsigned si_descriptor_list_cs_space(unsigned count, unsigned element_size) +{ + /* Ensure we have enough space to start a new range in a hole */ + assert(element_size >= 3); + + /* 5 dwords for possible load to reinitialize when we have no preamble + * IB + 5 dwords for write to L2 + 3 bytes for every range written to + * CE RAM. + */ + return 5 + 5 + 3 + count * element_size; +} + +static unsigned si_ce_needed_cs_space(void) +{ + unsigned space = 0; + + space += si_descriptor_list_cs_space(SI_NUM_CONST_BUFFERS, 4); + space += si_descriptor_list_cs_space(SI_NUM_RW_BUFFERS, 4); + space += si_descriptor_list_cs_space(SI_NUM_SHADER_BUFFERS, 4); + space += si_descriptor_list_cs_space(SI_NUM_SAMPLERS, 16); + space += si_descriptor_list_cs_space(SI_NUM_IMAGES, 8); + + space *= SI_NUM_SHADERS; + + /* Increment CE counter packet */ + space += 2; + + return space; +} + /* initialize */ void si_need_cs_space(struct si_context *ctx) { struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + struct radeon_winsys_cs *ce_ib = ctx->ce_ib; struct radeon_winsys_cs *dma = ctx->b.dma.cs; /* Flush the DMA IB if it's not empty. */ @@ -53,7 +84,9 @@ void si_need_cs_space(struct si_context *ctx) /* If the CS is sufficiently large, don't count the space needed * and just flush if there is not enough space left. */ - if (unlikely(cs->cdw > cs->max_dw - 2048)) + if (unlikely(cs->cdw > cs->max_dw - 2048 || + (ce_ib && ce_ib->max_dw - ce_ib->cdw < + si_ce_needed_cs_space()))) ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 6a990ed64c3..ddfa59fd128 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -142,6 +142,21 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, sctx); + + if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) { + sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); + if (!sctx->ce_ib) + goto fail; + + if (ws->cs_add_const_preamble_ib) { + sctx->ce_preamble_ib = + ws->cs_add_const_preamble_ib(sctx->b.gfx.cs); + + if (!sctx->ce_preamble_ib) + goto fail; + } + } + sctx->b.gfx.flush = si_context_gfx_flush; /* Border colors. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 0398b1df61e..b8db3b2ce85 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -191,6 +191,9 @@ struct si_context { void *custom_blend_dcc_decompress; void *pstipple_sampler_state; struct si_screen *screen; + struct radeon_winsys_cs *ce_ib; + struct radeon_winsys_cs *ce_preamble_ib; + struct pipe_fence_handle *last_gfx_fence; struct si_shader_ctx_state fixed_func_tcs_shader; LLVMTargetMachineRef tm; -- 2.30.2