From eb35238ffe55eb27930419acf0e145281ea3db9c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 22 Apr 2017 20:07:20 +0200 Subject: [PATCH] radeonsi/gfx9: move RW_BUFFERS to s[0:1] for merged shaders MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_descriptors.c | 13 ++++++--- src/gallium/drivers/radeonsi/si_shader.c | 29 ++++++++----------- src/gallium/drivers/radeonsi/si_shader.h | 3 ++ 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f04ed8794f7..5b7298eca9d 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1926,16 +1926,21 @@ void si_emit_graphics_shader_userdata(struct si_context *sctx, R_00B030_SPI_SHADER_USER_DATA_PS_0); si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0); - si_emit_shader_pointer(sctx, descs, - R_00B330_SPI_SHADER_USER_DATA_ES_0); - /* GFX9 merged LS-HS and ES-GS. Only set RW_BUFFERS for ES and LS. */ if (sctx->b.chip_class >= GFX9) { + /* GFX9 merged LS-HS and ES-GS. + * Set RW_BUFFERS in the special registers, so that + * it's preloaded into s[0:1] instead of s[8:9]. + */ si_emit_shader_pointer(sctx, descs, - R_00B430_SPI_SHADER_USER_DATA_LS_0); + R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS); + si_emit_shader_pointer(sctx, descs, + R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS); } else { si_emit_shader_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0); + si_emit_shader_pointer(sctx, descs, + R_00B330_SPI_SHADER_USER_DATA_ES_0); si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3d23d72d213..ed2e0669694 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2680,9 +2680,10 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) tf_soffset = LLVMGetParam(ctx->main_fn, ctx->param_tcs_factor_offset); + ret = si_insert_input_ptr_as_2xi32(ctx, ret, + ctx->param_rw_buffers, 0); + if (ctx->screen->b.chip_class >= GFX9) { - ret = si_insert_input_ptr_as_2xi32(ctx, ret, - ctx->param_rw_buffers, 8); ret = LLVMBuildInsertValue(builder, ret, offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT, ""); /* Tess offchip and tess factor offsets are at the beginning. */ @@ -2690,8 +2691,6 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) ret = LLVMBuildInsertValue(builder, ret, tf_soffset, 4, ""); vgpr = 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT + 1; } else { - ret = si_insert_input_ptr_as_2xi32(ctx, ret, - ctx->param_rw_buffers, 0); ret = LLVMBuildInsertValue(builder, ret, offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT, ""); /* Tess offchip and tess factor offsets are after user SGPRs. */ @@ -2718,14 +2717,12 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) { LLVMValueRef ret = ctx->return_value; + ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, - 8 + SI_SGPR_RW_BUFFERS); - ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, @@ -5866,8 +5863,8 @@ static void create_function(struct si_shader_context *ctx) case SI_SHADER_MERGED_VERTEX_TESSCTRL: /* Merged stages have 8 system SGPRs at the beginning. */ - params[num_params++] = ctx->i32; /* unused */ - params[num_params++] = ctx->i32; /* unused */ + params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */ + const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; params[ctx->param_merged_wave_info = num_params++] = ctx->i32; params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32; @@ -5875,8 +5872,8 @@ static void create_function(struct si_shader_context *ctx) params[num_params++] = ctx->i32; /* unused */ params[num_params++] = ctx->i32; /* unused */ - params[ctx->param_rw_buffers = num_params++] = - const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); + params[num_params++] = ctx->i32; /* unused */ + params[num_params++] = ctx->i32; /* unused */ declare_per_stage_desc_pointers(ctx, params, &num_params, ctx->type == PIPE_SHADER_VERTEX); declare_vs_specific_input_sgprs(ctx, params, &num_params); @@ -8330,19 +8327,17 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, int last_sgpr, num_params = 0; /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */ + params[ctx->param_rw_buffers = num_params++] = + const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); + if (ctx->screen->b.chip_class >= GFX9) { - params[num_params++] = ctx->i32; - params[num_params++] = ctx->i32; params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32; params[num_params++] = ctx->i32; /* wave info */ params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; - } - params[ctx->param_rw_buffers = num_params++] = - const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); - if (ctx->screen->b.chip_class >= GFX9) { + params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 299cbab54c9..ad72a3d3899 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -80,6 +80,9 @@ struct ac_shader_binary; /* SGPR user data indices */ enum { + /* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs, + * and these two are used for other purposes. + */ SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ SI_SGPR_RW_BUFFERS_HI, SI_SGPR_CONST_BUFFERS, -- 2.30.2