From 5c34562d7ce5d278c2948b4f27d16fcb3e4fd22d Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 2 May 2016 13:20:43 +0200 Subject: [PATCH] radeonsi: Add offchip tessellation parameters. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Nicolai Hähnle Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_shader.c | 28 +++++++++++++++---- src/gallium/drivers/radeonsi/si_shader.h | 3 +- .../drivers/radeonsi/si_state_shaders.c | 9 ++++++ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3df78200ff3..1f162b59f43 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -91,6 +91,12 @@ struct si_shader_context int param_tes_rel_patch_id; int param_tes_patch_id; int param_es2gs_offset; + int param_oc_lds; + + /* Sets a bit if the dynamic HS control word was 0x80000000. The bit is + * 0x800000 for VS, 0x1 for ES. + */ + int param_tess_offchip; LLVMTargetMachineRef tm; @@ -2326,14 +2332,14 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_TESS_FACTOR_OFFSET); ret = LLVMBuildInsertValue(builder, ret, tf_soffset, - SI_TCS_NUM_USER_SGPR, ""); + SI_TCS_NUM_USER_SGPR + 1, ""); /* VGPRs */ rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id); invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id); tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset); - vgpr = SI_TCS_NUM_USER_SGPR + 1; + vgpr = SI_TCS_NUM_USER_SGPR + 2; ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); @@ -4945,7 +4951,11 @@ static void declare_streamout_params(struct si_shader_context *ctx, /* Streamout SGPRs. */ if (so->num_outputs) { - params[ctx->param_streamout_config = (*num_params)++] = i32; + if (ctx->type != PIPE_SHADER_TESS_EVAL) + params[ctx->param_streamout_config = (*num_params)++] = i32; + else + ctx->param_streamout_config = ctx->param_tess_offchip; + params[ctx->param_streamout_write_index = (*num_params)++] = i32; } /* A streamout buffer offset is loaded if the stride is non-zero. */ @@ -5065,6 +5075,7 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32; + params[ctx->param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx->i32; params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx->i32; last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET; @@ -5074,8 +5085,10 @@ static void create_function(struct si_shader_context *ctx) num_params = SI_PARAM_REL_IDS+1; if (!ctx->is_monolithic) { - /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */ - for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++) + /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are + * placed after the user SGPRs. + */ + for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 3; i++) @@ -5089,10 +5102,14 @@ static void create_function(struct si_shader_context *ctx) num_params = SI_PARAM_TCS_OUT_LAYOUT+1; if (shader->key.tes.as_es) { + params[ctx->param_oc_lds = num_params++] = ctx->i32; + params[ctx->param_tess_offchip = num_params++] = ctx->i32; params[ctx->param_es2gs_offset = num_params++] = ctx->i32; } else { + params[ctx->param_tess_offchip = num_params++] = ctx->i32; declare_streamout_params(ctx, &shader->selector->so, params, ctx->i32, &num_params); + params[ctx->param_oc_lds = num_params++] = ctx->i32; } last_sgpr = num_params - 1; @@ -6640,6 +6657,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen, params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32; + params[ctx.param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx.i32; params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32; last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET; num_params = last_sgpr + 1; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 0ab816c6da0..61ddcd16328 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -166,7 +166,8 @@ enum { SI_PARAM_LS_OUT_LAYOUT, /* same value as TCS_IN_LAYOUT, LS only */ /* TCS only parameters. */ - SI_PARAM_TESS_FACTOR_OFFSET = SI_PARAM_TCS_IN_LAYOUT + 1, + SI_PARAM_TCS_OC_LDS = SI_PARAM_TCS_IN_LAYOUT + 1, + SI_PARAM_TESS_FACTOR_OFFSET, SI_PARAM_PATCH_ID, SI_PARAM_REL_IDS, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index d8ae2b232e9..968fc88e6b4 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -355,6 +355,7 @@ static void si_shader_hs(struct si_shader *shader) S_00B428_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, S_00B42C_USER_SGPR(SI_TCS_NUM_USER_SGPR) | + S_00B42C_OC_LDS_EN(1) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } @@ -364,6 +365,7 @@ static void si_shader_es(struct si_shader *shader) unsigned num_user_sgprs; unsigned vgpr_comp_cnt; uint64_t va; + unsigned oc_lds_en; pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -382,6 +384,8 @@ static void si_shader_es(struct si_shader *shader) } else unreachable("invalid shader selector type"); + oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0; + si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, shader->selector->esgs_itemsize / 4); si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); @@ -394,6 +398,7 @@ static void si_shader_es(struct si_shader *shader) S_00B328_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, S_00B32C_USER_SGPR(num_user_sgprs) | + S_00B32C_OC_LDS_EN(oc_lds_en) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); if (shader->selector->type == PIPE_SHADER_TESS_EVAL) @@ -490,6 +495,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) unsigned num_user_sgprs; unsigned nparams, vgpr_comp_cnt; uint64_t va; + unsigned oc_lds_en; unsigned window_space = shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; bool enable_prim_id = si_vs_exports_prim_id(shader); @@ -547,6 +553,8 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); + oc_lds_en = shader->selector->type == PIPE_SHADER_TESS_EVAL ? 1 : 0; + si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40); si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, @@ -557,6 +565,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) S_00B128_FLOAT_MODE(shader->config.float_mode)); si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, S_00B12C_USER_SGPR(num_user_sgprs) | + S_00B12C_OC_LDS_EN(oc_lds_en) | S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) | S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) | -- 2.30.2