From 2f2e941e2d9d6155e0661f452343e7a80f2439c4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Tue, 29 Nov 2016 17:41:59 +0100 Subject: [PATCH] radeonsi: use a single descriptor for the GSVS ring MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We can hardcode all of the fields for swizzling in the geometry shader. The advantage is that we use fewer descriptor slots and we no longer have to update any of the (ring) descriptors when the geometry shader changes. Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_pipe.h | 1 - src/gallium/drivers/radeonsi/si_shader.c | 73 +++++++++++++++++-- src/gallium/drivers/radeonsi/si_state.h | 6 +- .../drivers/radeonsi/si_state_shaders.c | 37 +--------- 4 files changed, 67 insertions(+), 50 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 31b7985fef9..9f79c2aa804 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -327,7 +327,6 @@ struct si_context { int last_vtx_reuse_depth; int current_rast_prim; /* primitive type after TES, GS */ bool gs_tri_strip_adj_fix; - unsigned last_gsvs_itemsize; /* Scratch buffer */ struct r600_resource *scratch_buffer; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 48ccd83b396..9b495925a6e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5817,6 +5817,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) { struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); @@ -5836,18 +5837,74 @@ static void preload_ring_buffers(struct si_shader_context *ctx) } if (ctx->shader->is_gs_copy_shader) { - LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS); + LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS); ctx->gsvs_ring[0] = build_indexed_load_const(ctx, buf_ptr, offset); - } - if (ctx->type == PIPE_SHADER_GEOMETRY) { - int i; - for (i = 0; i < 4; i++) { - LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i); + } else if (ctx->type == PIPE_SHADER_GEOMETRY) { + struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld; + LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS); + LLVMValueRef base_ring; + + base_ring = build_indexed_load_const(ctx, buf_ptr, offset); + + /* The conceptual layout of the GSVS ring is + * v0c0 .. vLv0 v0c1 .. vLc1 .. + * but the real memory layout is swizzled across + * threads: + * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL + * t16v0c0 .. + * Override the buffer descriptor accordingly. + */ + LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2); + unsigned max_gsvs_emit_size = ctx->shader->selector->max_gsvs_emit_size; + unsigned num_records; + + num_records = 64; + if (ctx->screen->b.chip_class >= VI) + num_records *= max_gsvs_emit_size; + + for (unsigned stream = 0; stream < 4; ++stream) { + LLVMValueRef ring, tmp; + + if (!ctx->shader->selector->info.num_stream_output_components[stream]) + continue; - ctx->gsvs_ring[i] = - build_indexed_load_const(ctx, buf_ptr, offset); + /* Limit on the stride field for <= CIK. */ + assert(max_gsvs_emit_size < (1 << 14)); + + ring = LLVMBuildBitCast(builder, base_ring, v2i64, ""); + tmp = LLVMBuildExtractElement(builder, ring, uint->zero, ""); + tmp = LLVMBuildAdd(builder, tmp, + LLVMConstInt(ctx->i64, + max_gsvs_emit_size * 64 * stream, 0), ""); + ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, ""); + ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, ""); + tmp = LLVMBuildExtractElement(builder, ring, uint->one, ""); + tmp = LLVMBuildOr(builder, tmp, + LLVMConstInt(ctx->i32, + S_008F04_STRIDE(max_gsvs_emit_size) | + S_008F04_SWIZZLE_ENABLE(1), 0), ""); + ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, ""); + ring = LLVMBuildInsertElement(builder, ring, + LLVMConstInt(ctx->i32, num_records, 0), + LLVMConstInt(ctx->i32, 2, 0), ""); + ring = LLVMBuildInsertElement(builder, ring, + LLVMConstInt(ctx->i32, + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */ + S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ + S_008F0C_ADD_TID_ENABLE(1), + 0), + LLVMConstInt(ctx->i32, 3, 0), ""); + ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, ""); + + ctx->gsvs_ring[stream] = ring; } } } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index d8e60249db2..a17dbc73102 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -167,11 +167,7 @@ enum { SI_ES_RING_ESGS, SI_GS_RING_ESGS, - SI_GS_RING_GSVS0, - SI_GS_RING_GSVS1, - SI_GS_RING_GSVS2, - SI_GS_RING_GSVS3, - SI_VS_RING_GSVS, + SI_RING_GSVS, SI_VS_STREAMOUT_BUF0, SI_VS_STREAMOUT_BUF1, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ea715695b97..1e9f5f0a217 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2039,47 +2039,14 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) false, false, 0, 0, 0); } if (sctx->gsvs_ring) { - si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS, + si_set_ring_buffer(&sctx->b.b, SI_RING_GSVS, sctx->gsvs_ring, 0, sctx->gsvs_ring->width0, false, false, 0, 0, 0); - - /* Also update SI_GS_RING_GSVSi descriptors. */ - sctx->last_gsvs_itemsize = 0; } return true; } -static void si_update_gsvs_ring_bindings(struct si_context *sctx) -{ - unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size; - uint64_t offset; - - if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize) - return; - - sctx->last_gsvs_itemsize = gsvs_itemsize; - - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS0, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, 0); - - offset = gsvs_itemsize * 64; - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS1, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, offset); - - offset = (gsvs_itemsize * 2) * 64; - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS2, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, offset); - - offset = (gsvs_itemsize * 3) * 64; - si_set_ring_buffer(&sctx->b.b, SI_GS_RING_GSVS3, - sctx->gsvs_ring, gsvs_itemsize, - 64, true, true, 4, 16, offset); -} - /** * @returns 1 if \p sel has been updated to use a new scratch buffer * 0 if not @@ -2469,8 +2436,6 @@ bool si_update_shaders(struct si_context *sctx) if (!si_update_gs_ring_buffers(sctx)) return false; - - si_update_gsvs_ring_bindings(sctx); } else { si_pm4_bind_state(sctx, gs, NULL); si_pm4_bind_state(sctx, es, NULL); -- 2.30.2