From f8e16010e51eef19ed7030ac7248438f729ae511 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Michel=20D=C3=A4nzer?= Date: Tue, 28 Jan 2014 15:39:30 +0900 Subject: [PATCH] radeonsi: Put GS ring buffer descriptors with streamout buffer descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit And mark the constant buffers as read only for the GPU again. Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_descriptors.c | 93 ++++++++++++------- src/gallium/drivers/radeonsi/si_pipe.h | 6 +- src/gallium/drivers/radeonsi/si_shader.c | 22 +++-- src/gallium/drivers/radeonsi/si_shader.h | 72 +++++++------- src/gallium/drivers/radeonsi/si_state.h | 6 +- 5 files changed, 115 insertions(+), 84 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 2a54fcb54fc..9078c6c7f3e 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -516,7 +516,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, unsigned element_size, unsigned index_stride) { struct si_context *sctx = (struct si_context *)ctx; - struct si_buffer_resources *buffers = &sctx->const_buffers[shader]; + struct si_buffer_resources *buffers = &sctx->rw_buffers[shader]; if (shader >= SI_NUM_SHADERS) return; @@ -608,9 +608,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned append_bitmask) { struct si_context *sctx = (struct si_context *)ctx; - struct si_buffer_resources *buffers = &sctx->streamout_buffers; + struct si_buffer_resources *buffers = &sctx->rw_buffers[PIPE_SHADER_VERTEX]; unsigned old_num_targets = sctx->b.streamout.num_targets; - unsigned i; + unsigned i, bufidx; /* Streamout buffers must be bound in 2 places: * 1) in VGT by setting the VGT_STRMOUT registers @@ -622,12 +622,14 @@ static void si_set_streamout_targets(struct pipe_context *ctx, /* Set the shader resources.*/ for (i = 0; i < num_targets; i++) { + bufidx = SI_RW_SO + i; + if (targets[i]) { struct pipe_resource *buffer = targets[i]->buffer; uint64_t va = r600_resource_va(ctx->screen, buffer); /* Set the descriptor. */ - uint32_t *desc = buffers->desc_data[i]; + uint32_t *desc = buffers->desc_data[bufidx]; desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); desc[2] = 0xffffffff; @@ -637,25 +639,29 @@ static void si_set_streamout_targets(struct pipe_context *ctx, S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); /* Set the resource. */ - pipe_resource_reference(&buffers->buffers[i], buffer); + pipe_resource_reference(&buffers->buffers[bufidx], + buffer); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)buffer, buffers->shader_usage); - buffers->desc.enabled_mask |= 1 << i; + buffers->desc.enabled_mask |= 1 << bufidx; } else { /* Clear the descriptor and unset the resource. */ - memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4); - pipe_resource_reference(&buffers->buffers[i], NULL); - buffers->desc.enabled_mask &= ~(1 << i); + memset(buffers->desc_data[bufidx], 0, + sizeof(uint32_t) * 4); + pipe_resource_reference(&buffers->buffers[bufidx], + NULL); + buffers->desc.enabled_mask &= ~(1 << bufidx); } - buffers->desc.dirty_mask |= 1 << i; + buffers->desc.dirty_mask |= 1 << bufidx; } for (; i < old_num_targets; i++) { + bufidx = SI_RW_SO + i; /* Clear the descriptor and unset the resource. */ - memset(buffers->desc_data[i], 0, sizeof(uint32_t) * 4); - pipe_resource_reference(&buffers->buffers[i], NULL); - buffers->desc.enabled_mask &= ~(1 << i); - buffers->desc.dirty_mask |= 1 << i; + memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4); + pipe_resource_reference(&buffers->buffers[bufidx], NULL); + buffers->desc.enabled_mask &= ~(1 << bufidx); + buffers->desc.dirty_mask |= 1 << bufidx; } si_update_descriptors(sctx, &buffers->desc); @@ -712,25 +718,37 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource /* Vertex buffers. */ /* Nothing to do. Vertex buffer bindings are updated before every draw call. */ - /* Streamout buffers. */ - for (i = 0; i < sctx->streamout_buffers.num_buffers; i++) { - if (sctx->streamout_buffers.buffers[i] == buf) { - /* Update the descriptor. */ - si_desc_reset_buffer_offset(ctx, sctx->streamout_buffers.desc_data[i], - old_va, buf); + /* Read/Write buffers. */ + for (shader = 0; shader < SI_NUM_SHADERS; shader++) { + struct si_buffer_resources *buffers = &sctx->rw_buffers[shader]; + bool found = false; + uint32_t mask = buffers->desc.enabled_mask; - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, - (struct r600_resource*)buf, - sctx->streamout_buffers.shader_usage); - sctx->streamout_buffers.desc.dirty_mask |= 1 << i; - si_update_descriptors(sctx, &sctx->streamout_buffers.desc); - - /* Update the streamout state. */ - if (sctx->b.streamout.begin_emitted) { - r600_emit_streamout_end(&sctx->b); + while (mask) { + i = u_bit_scan(&mask); + if (buffers->buffers[i] == buf) { + si_desc_reset_buffer_offset(ctx, buffers->desc_data[i], + old_va, buf); + + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, + rbuffer, buffers->shader_usage); + + buffers->desc.dirty_mask |= 1 << i; + found = true; + + if (i >= SI_RW_SO && shader == PIPE_SHADER_VERTEX) { + /* Update the streamout state. */ + if (sctx->b.streamout.begin_emitted) { + r600_emit_streamout_end(&sctx->b); + } + sctx->b.streamout.append_bitmask = + sctx->b.streamout.enabled_mask; + r600_streamout_buffers_dirty(&sctx->b); + } } - sctx->b.streamout.append_bitmask = sctx->b.streamout.enabled_mask; - r600_streamout_buffers_dirty(&sctx->b); + } + if (found) { + si_update_descriptors(sctx, &buffers->desc); } } @@ -936,17 +954,20 @@ void si_init_all_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_init_buffer_resources(sctx, &sctx->const_buffers[i], NUM_CONST_BUFFERS, i, SI_SGPR_CONST, + RADEON_USAGE_READ); + si_init_buffer_resources(sctx, &sctx->rw_buffers[i], + i == PIPE_SHADER_VERTEX ? + SI_RW_SO + 4 : SI_RW_SO, + i, SI_SGPR_RW_BUFFERS, RADEON_USAGE_READWRITE); si_init_sampler_views(sctx, &sctx->samplers[i].views, i); sctx->atoms.const_buffers[i] = &sctx->const_buffers[i].desc.atom; + sctx->atoms.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom; sctx->atoms.sampler_views[i] = &sctx->samplers[i].views.desc.atom; } - si_init_buffer_resources(sctx, &sctx->streamout_buffers, 4, PIPE_SHADER_VERTEX, - SI_SGPR_SO_BUFFER, RADEON_USAGE_WRITE); - sctx->atoms.streamout_buffers = &sctx->streamout_buffers.desc.atom; /* Set pipe_context functions. */ sctx->b.b.set_constant_buffer = si_set_constant_buffer; @@ -961,9 +982,9 @@ void si_release_all_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_release_buffer_resources(&sctx->const_buffers[i]); + si_release_buffer_resources(&sctx->rw_buffers[i]); si_release_sampler_views(&sctx->samplers[i].views); } - si_release_buffer_resources(&sctx->streamout_buffers); } void si_all_descriptors_begin_new_cs(struct si_context *sctx) @@ -972,7 +993,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]); + si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]); si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); } - si_buffer_resources_begin_new_cs(sctx, &sctx->streamout_buffers); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d63a52b0aff..f97feb0464c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -78,6 +78,8 @@ struct si_surface { #define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1) +#define SI_RW_SO 2 /* Streamout buffer descriptors after ring buffers */ + struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -93,8 +95,8 @@ struct si_context { struct { /* The order matters. */ struct r600_atom *const_buffers[SI_NUM_SHADERS]; + struct r600_atom *rw_buffers[SI_NUM_SHADERS]; struct r600_atom *sampler_views[SI_NUM_SHADERS]; - struct r600_atom *streamout_buffers; /* Caches must be flushed after resource descriptors are * updated in memory. */ struct r600_atom *cache_flush; @@ -120,7 +122,7 @@ struct si_context { unsigned sprite_coord_enable; unsigned export_16bpc; struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; - struct si_buffer_resources streamout_buffers; + struct si_buffer_resources rw_buffers[SI_NUM_SHADERS]; struct si_textures_info samplers[SI_NUM_SHADERS]; struct r600_resource *border_color_table; unsigned border_color_offset; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5b95c11580f..54270cdb733 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -319,7 +319,8 @@ static LLVMValueRef fetch_input_gs( 4); /* Load the ESGS ring resource descriptor */ - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); t_list = build_indexed_load(si_shader_ctx, t_list_ptr, lp_build_const_int32(gallivm, SI_RING_ESGS)); @@ -1202,7 +1203,8 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base) } /* Load the ESGS ring resource descriptor */ - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); t_list = build_indexed_load(si_shader_ctx, t_list_ptr, lp_build_const_int32(gallivm, SI_RING_ESGS)); @@ -1910,7 +1912,8 @@ static void si_llvm_emit_vertex( int i; /* Load the GSVS ring resource descriptor */ - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); t_list = build_indexed_load(si_shader_ctx, t_list_ptr, lp_build_const_int32(gallivm, SI_RING_GSVS)); @@ -2038,7 +2041,7 @@ static void create_function(struct si_shader_context *si_shader_ctx) struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_pipe_shader *shader = si_shader_ctx->shader; - LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32; + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32; unsigned i, last_sgpr, num_params; i8 = LLVMInt8TypeInContext(gallivm->context); @@ -2049,6 +2052,8 @@ static void create_function(struct si_shader_context *si_shader_ctx) params[SI_PARAM_CONST] = LLVMPointerType( LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE); + params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST]; + /* We assume at most 16 textures per program at the moment. * This need probably need to be changed to support bindless textures */ params[SI_PARAM_SAMPLER] = LLVMPointerType( @@ -2059,7 +2064,6 @@ static void create_function(struct si_shader_context *si_shader_ctx) switch (si_shader_ctx->type) { case TGSI_PROCESSOR_VERTEX: params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST]; - params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST]; params[SI_PARAM_START_INSTANCE] = i32; num_params = SI_PARAM_START_INSTANCE+1; if (shader->key.vs.as_es) { @@ -2257,12 +2261,13 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx) return; LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, - SI_PARAM_SO_BUFFER); + SI_PARAM_RW_BUFFERS); /* Load the resources, we rely on the code sinking to do the rest */ for (i = 0; i < 4; ++i) { if (si_shader_ctx->shader->selector->so.stride[i]) { - LLVMValueRef offset = lp_build_const_int32(gallivm, i); + LLVMValueRef offset = lp_build_const_int32(gallivm, + SI_RW_SO + i); si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset); } @@ -2371,7 +2376,8 @@ static int si_generate_gs_copy_shader(struct si_context *sctx, preload_streamout_buffers(si_shader_ctx); /* Load the GSVS ring resource descriptor */ - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); t_list = build_indexed_load(si_shader_ctx, t_list_ptr, lp_build_const_int32(gallivm, SI_RING_GSVS)); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 63c19ecaeef..d667baf402d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -34,23 +34,23 @@ #define SI_SGPR_CONST 0 #define SI_SGPR_SAMPLER 2 #define SI_SGPR_RESOURCE 4 -#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */ -#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */ +#define SI_SGPR_RW_BUFFERS 6 /* rings (& stream-out, VS only) */ +#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */ #define SI_SGPR_START_INSTANCE 10 /* VS only */ -#define SI_SGPR_ALPHA_REF 6 /* PS only */ +#define SI_SGPR_ALPHA_REF 8 /* PS only */ #define SI_VS_NUM_USER_SGPR 11 -#define SI_GS_NUM_USER_SGPR 6 -#define SI_PS_NUM_USER_SGPR 7 +#define SI_GS_NUM_USER_SGPR 8 +#define SI_PS_NUM_USER_SGPR 9 /* LLVM function parameter indices */ #define SI_PARAM_CONST 0 #define SI_PARAM_SAMPLER 1 #define SI_PARAM_RESOURCE 2 +#define SI_PARAM_RW_BUFFERS 3 /* VS only parameters */ -#define SI_PARAM_VERTEX_BUFFER 3 -#define SI_PARAM_SO_BUFFER 4 +#define SI_PARAM_VERTEX_BUFFER 4 #define SI_PARAM_START_INSTANCE 5 /* the other VS parameters are assigned dynamically */ @@ -58,36 +58,38 @@ #define SI_PARAM_ES2GS_OFFSET 6 /* GS only parameters */ -#define SI_PARAM_GS2VS_OFFSET 3 -#define SI_PARAM_GS_WAVE_ID 4 -#define SI_PARAM_VTX0_OFFSET 5 -#define SI_PARAM_VTX1_OFFSET 6 -#define SI_PARAM_PRIMITIVE_ID 7 -#define SI_PARAM_VTX2_OFFSET 8 -#define SI_PARAM_VTX3_OFFSET 9 -#define SI_PARAM_VTX4_OFFSET 10 -#define SI_PARAM_VTX5_OFFSET 11 -#define SI_PARAM_GS_INSTANCE_ID 12 +#define SI_PARAM_GS2VS_OFFSET 4 +#define SI_PARAM_GS_WAVE_ID 5 +#define SI_PARAM_VTX0_OFFSET 6 +#define SI_PARAM_VTX1_OFFSET 7 +#define SI_PARAM_PRIMITIVE_ID 8 +#define SI_PARAM_VTX2_OFFSET 9 +#define SI_PARAM_VTX3_OFFSET 10 +#define SI_PARAM_VTX4_OFFSET 11 +#define SI_PARAM_VTX5_OFFSET 12 +#define SI_PARAM_GS_INSTANCE_ID 13 /* PS only parameters */ -#define SI_PARAM_ALPHA_REF 3 -#define SI_PARAM_PRIM_MASK 4 -#define SI_PARAM_PERSP_SAMPLE 5 -#define SI_PARAM_PERSP_CENTER 6 -#define SI_PARAM_PERSP_CENTROID 7 -#define SI_PARAM_PERSP_PULL_MODEL 8 -#define SI_PARAM_LINEAR_SAMPLE 9 -#define SI_PARAM_LINEAR_CENTER 10 -#define SI_PARAM_LINEAR_CENTROID 11 -#define SI_PARAM_LINE_STIPPLE_TEX 12 -#define SI_PARAM_POS_X_FLOAT 13 -#define SI_PARAM_POS_Y_FLOAT 14 -#define SI_PARAM_POS_Z_FLOAT 15 -#define SI_PARAM_POS_W_FLOAT 16 -#define SI_PARAM_FRONT_FACE 17 -#define SI_PARAM_ANCILLARY 18 -#define SI_PARAM_SAMPLE_COVERAGE 19 -#define SI_PARAM_POS_FIXED_PT 20 +#define SI_PARAM_ALPHA_REF 4 +#define SI_PARAM_PRIM_MASK 5 +#define SI_PARAM_PERSP_SAMPLE 6 +#define SI_PARAM_PERSP_CENTER 7 +#define SI_PARAM_PERSP_CENTROID 8 +#define SI_PARAM_PERSP_PULL_MODEL 9 +#define SI_PARAM_LINEAR_SAMPLE 10 +#define SI_PARAM_LINEAR_CENTER 11 +#define SI_PARAM_LINEAR_CENTROID 12 +#define SI_PARAM_LINE_STIPPLE_TEX 13 +#define SI_PARAM_POS_X_FLOAT 14 +#define SI_PARAM_POS_Y_FLOAT 15 +#define SI_PARAM_POS_Z_FLOAT 16 +#define SI_PARAM_POS_W_FLOAT 17 +#define SI_PARAM_FRONT_FACE 18 +#define SI_PARAM_ANCILLARY 19 +#define SI_PARAM_SAMPLE_COVERAGE 20 +#define SI_PARAM_POS_FIXED_PT 21 + +#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) struct si_shader_input { unsigned name; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f7082f5df43..6922c88e07c 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -115,10 +115,10 @@ union si_state { #define NUM_SAMPLER_STATES NUM_TEX_UNITS #define NUM_PIPE_CONST_BUFFERS 16 -#define SI_RING_ESGS 17 -#define SI_RING_GSVS 18 -#define NUM_CONST_BUFFERS (SI_RING_GSVS + 1) +#define NUM_CONST_BUFFERS (NUM_PIPE_CONST_BUFFERS + 1) +#define SI_RING_ESGS 0 +#define SI_RING_GSVS 1 /* This represents resource descriptors in memory, such as buffer resources, * image resources, and sampler states. -- 2.30.2