From c495c0ad37dc6a4505a726e3ac0e3d83adc46d30 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Tue, 15 Mar 2016 16:30:56 -0500 Subject: [PATCH] radeonsi: implement set_shader_buffers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák Reviewed-by: Edward O'Callaghan --- src/gallium/drivers/radeonsi/si_descriptors.c | 61 +++++++++- src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 5 +- src/gallium/drivers/radeonsi/si_shader.h | 114 +++++++++--------- src/gallium/drivers/radeonsi/si_state.h | 2 + 5 files changed, 125 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 9cdf5e0f254..b5557d800c7 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -746,6 +746,55 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s buffers->desc.list_dirty = true; } +/* SHADER BUFFERS */ + +static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader, + unsigned start_slot, unsigned count, + struct pipe_shader_buffer *sbuffers) +{ + struct si_context *sctx = (struct si_context *)ctx; + struct si_buffer_resources *buffers = &sctx->shader_buffers[shader]; + unsigned i; + + assert(start_slot + count <= SI_NUM_SHADER_BUFFERS); + + for (i = 0; i < count; ++i) { + struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL; + struct r600_resource *buf; + unsigned slot = start_slot + i; + uint32_t *desc = buffers->desc.list + slot * 4; + uint64_t va; + + if (!sbuffer || !sbuffer->buffer) { + pipe_resource_reference(&buffers->buffers[slot], NULL); + memset(desc, 0, sizeof(uint32_t) * 4); + buffers->desc.enabled_mask &= ~(1llu << slot); + continue; + } + + buf = (struct r600_resource *)sbuffer->buffer; + va = buf->gpu_address + sbuffer->buffer_offset; + + desc[0] = va; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(0); + desc[2] = sbuffer->buffer_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + + pipe_resource_reference(&buffers->buffers[slot], &buf->b.b); + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf, + buffers->shader_usage, buffers->priority); + buffers->desc.enabled_mask |= 1llu << slot; + } + + buffers->desc.list_dirty = true; +} + /* RING BUFFERS */ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, @@ -1078,10 +1127,12 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource } } - /* Constant buffers. */ + /* Constant and shader buffers. */ for (shader = 0; shader < SI_NUM_SHADERS; shader++) { si_reset_buffer_resources(sctx, &sctx->const_buffers[shader], buf, old_va); + si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader], + buf, old_va); } /* Texture buffers - update virtual addresses in sampler view descriptors. */ @@ -1261,6 +1312,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false); si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false); + si_emit_shader_pointer(sctx, &sctx->shader_buffers[i].desc, base, false); si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false); si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false); } @@ -1280,6 +1332,9 @@ void si_init_all_descriptors(struct si_context *sctx) si_init_buffer_resources(&sctx->rw_buffers[i], SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT); + si_init_buffer_resources(&sctx->shader_buffers[i], + SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS, + RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER); si_init_descriptors(&sctx->samplers[i].views.desc, SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS, @@ -1297,6 +1352,7 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.set_shader_images = si_set_shader_images; sctx->b.b.set_constant_buffer = si_set_constant_buffer; + sctx->b.b.set_shader_buffers = si_set_shader_buffers; sctx->b.b.set_sampler_views = si_set_sampler_views; sctx->b.b.set_stream_output_targets = si_set_streamout_targets; sctx->b.invalidate_buffer = si_invalidate_buffer; @@ -1319,6 +1375,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) || !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) || + !si_upload_descriptors(sctx, &sctx->shader_buffers[i].desc) || !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) || !si_upload_descriptors(sctx, &sctx->images[i].desc)) return false; @@ -1333,6 +1390,7 @@ void si_release_all_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_release_buffer_resources(&sctx->const_buffers[i]); si_release_buffer_resources(&sctx->rw_buffers[i]); + si_release_buffer_resources(&sctx->shader_buffers[i]); si_release_sampler_views(&sctx->samplers[i].views); si_release_image_views(&sctx->images[i]); } @@ -1346,6 +1404,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]); si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]); + si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]); si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); si_image_views_begin_new_cs(sctx, &sctx->images[i]); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3635d6303c4..0398b1df61e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -241,6 +241,7 @@ struct si_context { struct si_descriptors vertex_buffers; struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; struct si_buffer_resources rw_buffers[SI_NUM_SHADERS]; + struct si_buffer_resources shader_buffers[SI_NUM_SHADERS]; struct si_textures_info samplers[SI_NUM_SHADERS]; struct si_images_info images[SI_NUM_SHADERS]; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 59c6f41f803..8f9a6212b43 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4450,7 +4450,8 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS); params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS); params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES); - last_array_pointer = SI_PARAM_IMAGES; + params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS); + last_array_pointer = SI_PARAM_SHADER_BUFFERS; switch (ctx->type) { case TGSI_PROCESSOR_VERTEX: @@ -6034,6 +6035,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen, params[SI_PARAM_CONST_BUFFERS] = ctx.i64; params[SI_PARAM_SAMPLERS] = ctx.i64; params[SI_PARAM_IMAGES] = ctx.i64; + params[SI_PARAM_SHADER_BUFFERS] = ctx.i64; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32; @@ -6284,6 +6286,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen, params[SI_PARAM_CONST_BUFFERS] = ctx.i64; params[SI_PARAM_SAMPLERS] = ctx.i64; params[SI_PARAM_IMAGES] = ctx.i64; + params[SI_PARAM_SHADER_BUFFERS] = ctx.i64; params[SI_PARAM_ALPHA_REF] = ctx.f32; last_array_pointer = -1; last_sgpr = SI_PARAM_ALPHA_REF; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 8059edf6395..013c8a2c114 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -81,95 +81,97 @@ struct radeon_shader_reloc; #define SI_SGPR_CONST_BUFFERS 2 #define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */ #define SI_SGPR_IMAGES 6 -#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */ -#define SI_SGPR_BASE_VERTEX 10 /* VS only */ -#define SI_SGPR_START_INSTANCE 11 /* VS only */ -#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */ -#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */ -#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */ -#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ -#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ -#define SI_SGPR_ALPHA_REF 8 /* PS only */ - -#define SI_VS_NUM_USER_SGPR 13 /* API VS */ -#define SI_ES_NUM_USER_SGPR 12 /* API VS */ -#define SI_LS_NUM_USER_SGPR 13 /* API VS */ -#define SI_TCS_NUM_USER_SGPR 11 -#define SI_TES_NUM_USER_SGPR 10 -#define SI_GS_NUM_USER_SGPR 8 +#define SI_SGPR_SHADER_BUFFERS 8 +#define SI_SGPR_VERTEX_BUFFERS 10 /* VS only */ +#define SI_SGPR_BASE_VERTEX 12 /* VS only */ +#define SI_SGPR_START_INSTANCE 13 /* VS only */ +#define SI_SGPR_VS_STATE_BITS 14 /* VS(VS) only */ +#define SI_SGPR_LS_OUT_LAYOUT 14 /* VS(LS) only */ +#define SI_SGPR_TCS_OUT_OFFSETS 10 /* TCS & TES only */ +#define SI_SGPR_TCS_OUT_LAYOUT 11 /* TCS & TES only */ +#define SI_SGPR_TCS_IN_LAYOUT 12 /* TCS only */ +#define SI_SGPR_ALPHA_REF 10 /* PS only */ + +#define SI_VS_NUM_USER_SGPR 15 /* API VS */ +#define SI_ES_NUM_USER_SGPR 14 /* API VS */ +#define SI_LS_NUM_USER_SGPR 15 /* API VS */ +#define SI_TCS_NUM_USER_SGPR 13 +#define SI_TES_NUM_USER_SGPR 12 +#define SI_GS_NUM_USER_SGPR 10 #define SI_GSCOPY_NUM_USER_SGPR 4 -#define SI_PS_NUM_USER_SGPR 9 +#define SI_PS_NUM_USER_SGPR 11 /* LLVM function parameter indices */ #define SI_PARAM_RW_BUFFERS 0 #define SI_PARAM_CONST_BUFFERS 1 #define SI_PARAM_SAMPLERS 2 #define SI_PARAM_IMAGES 3 +#define SI_PARAM_SHADER_BUFFERS 4 /* VS only parameters */ -#define SI_PARAM_VERTEX_BUFFERS 4 -#define SI_PARAM_BASE_VERTEX 5 -#define SI_PARAM_START_INSTANCE 6 +#define SI_PARAM_VERTEX_BUFFERS 5 +#define SI_PARAM_BASE_VERTEX 6 +#define SI_PARAM_START_INSTANCE 7 /* [0] = clamp vertex color */ -#define SI_PARAM_VS_STATE_BITS 7 +#define SI_PARAM_VS_STATE_BITS 8 /* the other VS parameters are assigned dynamically */ /* Offsets where TCS outputs and TCS patch outputs live in LDS: * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32 */ -#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */ +#define SI_PARAM_TCS_OUT_OFFSETS 5 /* for TCS & TES */ /* Layout of TCS outputs / TES inputs: * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4 * [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4 * [26:31] = gl_PatchVerticesIn, max = 32 */ -#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */ +#define SI_PARAM_TCS_OUT_LAYOUT 6 /* for TCS & TES */ /* Layout of LS outputs / TCS inputs * [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4 * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4 */ -#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */ -#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */ +#define SI_PARAM_TCS_IN_LAYOUT 7 /* TCS only */ +#define SI_PARAM_LS_OUT_LAYOUT 8 /* same value as TCS_IN_LAYOUT, LS only */ /* TCS only parameters. */ -#define SI_PARAM_TESS_FACTOR_OFFSET 7 -#define SI_PARAM_PATCH_ID 8 -#define SI_PARAM_REL_IDS 9 +#define SI_PARAM_TESS_FACTOR_OFFSET 8 +#define SI_PARAM_PATCH_ID 9 +#define SI_PARAM_REL_IDS 10 /* GS only parameters */ -#define SI_PARAM_GS2VS_OFFSET 4 -#define SI_PARAM_GS_WAVE_ID 5 -#define SI_PARAM_VTX0_OFFSET 6 -#define SI_PARAM_VTX1_OFFSET 7 -#define SI_PARAM_PRIMITIVE_ID 8 -#define SI_PARAM_VTX2_OFFSET 9 -#define SI_PARAM_VTX3_OFFSET 10 -#define SI_PARAM_VTX4_OFFSET 11 -#define SI_PARAM_VTX5_OFFSET 12 -#define SI_PARAM_GS_INSTANCE_ID 13 +#define SI_PARAM_GS2VS_OFFSET 5 +#define SI_PARAM_GS_WAVE_ID 6 +#define SI_PARAM_VTX0_OFFSET 7 +#define SI_PARAM_VTX1_OFFSET 8 +#define SI_PARAM_PRIMITIVE_ID 9 +#define SI_PARAM_VTX2_OFFSET 10 +#define SI_PARAM_VTX3_OFFSET 11 +#define SI_PARAM_VTX4_OFFSET 12 +#define SI_PARAM_VTX5_OFFSET 13 +#define SI_PARAM_GS_INSTANCE_ID 14 /* PS only parameters */ -#define SI_PARAM_ALPHA_REF 4 -#define SI_PARAM_PRIM_MASK 5 -#define SI_PARAM_PERSP_SAMPLE 6 -#define SI_PARAM_PERSP_CENTER 7 -#define SI_PARAM_PERSP_CENTROID 8 -#define SI_PARAM_PERSP_PULL_MODEL 9 -#define SI_PARAM_LINEAR_SAMPLE 10 -#define SI_PARAM_LINEAR_CENTER 11 -#define SI_PARAM_LINEAR_CENTROID 12 -#define SI_PARAM_LINE_STIPPLE_TEX 13 -#define SI_PARAM_POS_X_FLOAT 14 -#define SI_PARAM_POS_Y_FLOAT 15 -#define SI_PARAM_POS_Z_FLOAT 16 -#define SI_PARAM_POS_W_FLOAT 17 -#define SI_PARAM_FRONT_FACE 18 -#define SI_PARAM_ANCILLARY 19 -#define SI_PARAM_SAMPLE_COVERAGE 20 -#define SI_PARAM_POS_FIXED_PT 21 +#define SI_PARAM_ALPHA_REF 5 +#define SI_PARAM_PRIM_MASK 6 +#define SI_PARAM_PERSP_SAMPLE 7 +#define SI_PARAM_PERSP_CENTER 8 +#define SI_PARAM_PERSP_CENTROID 9 +#define SI_PARAM_PERSP_PULL_MODEL 10 +#define SI_PARAM_LINEAR_SAMPLE 11 +#define SI_PARAM_LINEAR_CENTER 12 +#define SI_PARAM_LINEAR_CENTROID 13 +#define SI_PARAM_LINE_STIPPLE_TEX 14 +#define SI_PARAM_POS_X_FLOAT 15 +#define SI_PARAM_POS_Y_FLOAT 16 +#define SI_PARAM_POS_Z_FLOAT 17 +#define SI_PARAM_POS_W_FLOAT 18 +#define SI_PARAM_FRONT_FACE 19 +#define SI_PARAM_ANCILLARY 20 +#define SI_PARAM_SAMPLE_COVERAGE 21 +#define SI_PARAM_POS_FIXED_PT 22 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */ diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f55f19e2918..6748f802c7d 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -161,6 +161,8 @@ struct si_shader_data { #define SI_NUM_IMAGES 16 +#define SI_NUM_SHADER_BUFFERS 16 + /* Read-write buffer slots. * * Ring buffers: 0..1 -- 2.30.2