From b519ddc35cf9098ad1541457a3d1c34a8ec99961 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Sat, 4 May 2019 12:11:08 +0200 Subject: [PATCH] radeonsi/gfx9: declare LDS ESGS ring as an explicit symbol on LLVM >= 9 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This will make it easier to use LDS for other purposes in geometry shaders in the future. The lifetime of the esgs_ring variable is as follows: - declared as [0 x i32] while compiling shader parts or monolithic shaders - just before uploading, gfx9_get_gs_info computes (among other things) the final ESGS ring size (this depends on both the ES and the GS shader) - during upload, the "esgs_ring" symbol is given to ac_rtld as a shared LDS symbol, which will lead to correctly laying out the LDS including other LDS objects that may be defined in the future - si_shader_gs uses shader->config.lds_size as the LDS size This change depends on the LLVM changes for emitting LDS symbols into the ELF file. Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/si_shader.c | 82 +++++++++++++++---- src/gallium/drivers/radeonsi/si_shader.h | 19 +++++ .../drivers/radeonsi/si_state_shaders.c | 29 ++----- 3 files changed, 94 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ab97cd87273..85c2acc2195 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1543,9 +1543,22 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, return NULL; } + unsigned offset = param * 4 + swizzle; vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset, - LLVMConstInt(ctx->i32, param * 4, 0), ""); - return lds_load(bld_base, type, swizzle, vtx_offset); + LLVMConstInt(ctx->i32, offset, false), ""); + + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset); + LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, ""); + if (llvm_type_is_64bit(ctx, type)) { + ptr = LLVMBuildGEP(ctx->ac.builder, ptr, + &ctx->ac.i32_1, 1, ""); + LLVMValueRef values[2] = { + value, + LLVMBuildLoad(ctx->ac.builder, ptr, "") + }; + value = ac_build_gather_values(&ctx->ac, values, 2); + } + return LLVMBuildBitCast(ctx->ac.builder, value, type, ""); } /* GFX6: input load from the ESGS ring in memory. */ @@ -3513,7 +3526,9 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, /* GFX9 has the ESGS ring in LDS. */ if (ctx->screen->info.chip_class >= GFX9) { - lds_store(ctx, param * 4 + chan, lds_base, out_val); + LLVMValueRef idx = LLVMConstInt(ctx->i32, param * 4 + chan, false); + idx = LLVMBuildAdd(ctx->ac.builder, lds_base, idx, ""); + ac_build_indexed_store(&ctx->ac, ctx->esgs_ring, idx, out_val); continue; } @@ -4911,10 +4926,7 @@ static void create_function(struct si_shader_context *ctx) assert(shader->info.num_input_vgprs >= num_prolog_vgprs); shader->info.num_input_vgprs -= num_prolog_vgprs; - if (shader->key.as_ls || - ctx->type == PIPE_SHADER_TESS_CTRL || - /* GFX9 has the ESGS ring buffer in LDS. */ - type == SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY) + if (shader->key.as_ls || ctx->type == PIPE_SHADER_TESS_CTRL) ac_declare_lds_as_pointer(&ctx->ac); } @@ -4929,15 +4941,33 @@ static void preload_ring_buffers(struct si_shader_context *ctx) LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); - if (ctx->screen->info.chip_class <= GFX8 && - (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) { - unsigned ring = - ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS - : SI_ES_RING_ESGS; - LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0); + if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY) { + if (ctx->screen->info.chip_class <= GFX8) { + unsigned ring = + ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS + : SI_ES_RING_ESGS; + LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0); - ctx->esgs_ring = - ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); + ctx->esgs_ring = + ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); + } else { + if (USE_LDS_SYMBOLS && HAVE_LLVM >= 0x0900) { + /* Declare the ESGS ring as an explicit LDS symbol. + * For monolithic shaders, we declare the ring only once. + * + * We declare it with 64KB alignment as a hint that the + * pointer value will always be 0. + */ + ctx->esgs_ring = LLVMAddGlobalInAddressSpace( + ctx->ac.module, LLVMArrayType(ctx->i32, 0), + "esgs_ring", + AC_ADDR_SPACE_LDS); + LLVMSetAlignment(ctx->esgs_ring, 64 * 1024); + } else { + ac_declare_lds_as_pointer(&ctx->ac); + ctx->esgs_ring = ctx->ac.lds; + } + } } if (ctx->shader->is_gs_copy_shader) { @@ -5055,6 +5085,7 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader, struct ac_rtld_binary *rtld) { + const struct si_shader_selector *sel = shader->selector; const char *part_elfs[5]; size_t part_sizes[5]; unsigned num_parts = 0; @@ -5074,11 +5105,27 @@ static bool si_shader_binary_open(struct si_screen *screen, #undef add_part + struct ac_rtld_symbol lds_symbols[1]; + unsigned num_lds_symbols = 0; + + if (sel && screen->info.chip_class >= GFX9 && + sel->type == PIPE_SHADER_GEOMETRY && !shader->is_gs_copy_shader) { + /* We add this symbol even on LLVM <= 8 to ensure that + * shader->config.lds_size is set correctly below. + */ + struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; + sym->name = "esgs_ring"; + sym->size = shader->gs_info.esgs_ring_size; + sym->align = 64 * 1024; + } + bool ok = ac_rtld_open(rtld, (struct ac_rtld_open_info){ .info = &screen->info, .num_parts = num_parts, .elf_ptrs = part_elfs, - .elf_sizes = part_sizes }); + .elf_sizes = part_sizes, + .num_shared_lds_symbols = num_lds_symbols, + .shared_lds_symbols = lds_symbols }); if (rtld->lds_size > 0) { unsigned alloc_granularity = screen->info.chip_class >= GFX7 ? 512 : 256; @@ -8013,6 +8060,9 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil si_calculate_max_simd_waves(shader); } + if (sscreen->info.chip_class >= GFX9 && sel->type == PIPE_SHADER_GEOMETRY) + gfx9_get_gs_info(shader->previous_stage_sel, sel, &shader->gs_info); + si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, stderr, true); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index beda82beb05..f9a754d7f0b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -143,6 +143,10 @@ #include +// Use LDS symbols when supported by LLVM. Can be disabled for testing the old +// path on newer LLVM for now. Should be removed in the long term. +#define USE_LDS_SYMBOLS (true) + struct nir_shader; struct si_shader; struct si_context; @@ -595,6 +599,14 @@ struct si_shader_binary { char *llvm_ir_string; }; +struct gfx9_gs_info { + unsigned es_verts_per_subgroup; + unsigned gs_prims_per_subgroup; + unsigned gs_inst_prims_in_subgroup; + unsigned max_prims_per_subgroup; + unsigned esgs_ring_size; /* in bytes */ +}; + struct si_shader { struct si_compiler_ctx_state compiler_ctx_state; @@ -629,6 +641,8 @@ struct si_shader { char *shader_log; size_t shader_log_size; + struct gfx9_gs_info gs_info; + /* For save precompute context registers values. */ union { struct { @@ -718,6 +732,11 @@ void si_nir_scan_tess_ctrl(const struct nir_shader *nir, void si_lower_nir(struct si_shader_selector *sel); void si_nir_opts(struct nir_shader *nir); +/* si_state_shaders.c */ +void gfx9_get_gs_info(struct si_shader_selector *es, + struct si_shader_selector *gs, + struct gfx9_gs_info *out); + /* Inline helpers. */ /* Return the pointer to the main shader part's pointer. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e90884c898a..fab2e255742 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -646,17 +646,9 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode) return prim_conv[mode]; } -struct gfx9_gs_info { - unsigned es_verts_per_subgroup; - unsigned gs_prims_per_subgroup; - unsigned gs_inst_prims_in_subgroup; - unsigned max_prims_per_subgroup; - unsigned lds_size; -}; - -static void gfx9_get_gs_info(struct si_shader_selector *es, - struct si_shader_selector *gs, - struct gfx9_gs_info *out) +void gfx9_get_gs_info(struct si_shader_selector *es, + struct si_shader_selector *gs, + struct gfx9_gs_info *out) { unsigned gs_num_invocations = MAX2(gs->gs_num_invocations, 1); unsigned input_prim = gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; @@ -747,7 +739,7 @@ static void gfx9_get_gs_info(struct si_shader_selector *es, out->gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations; out->max_prims_per_subgroup = out->gs_inst_prims_in_subgroup * gs->gs_max_out_vertices; - out->lds_size = align(esgs_lds_size, 128) / 128; + out->esgs_ring_size = 4 * esgs_lds_size; assert(out->max_prims_per_subgroup <= max_out_prims); } @@ -876,7 +868,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) unsigned input_prim = sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]; unsigned es_type = shader->key.part.gs.es->type; unsigned es_vgpr_comp_cnt, gs_vgpr_comp_cnt; - struct gfx9_gs_info gs_info; if (es_type == PIPE_SHADER_VERTEX) /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ @@ -904,8 +895,6 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) else num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR; - gfx9_get_gs_info(shader->key.part.gs.es, sel, &gs_info); - si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(va >> 40)); @@ -920,15 +909,15 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) S_00B22C_USER_SGPR_MSB(num_user_sgprs >> 5) | S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_type == PIPE_SHADER_TESS_EVAL) | - S_00B22C_LDS_SIZE(gs_info.lds_size) | + S_00B22C_LDS_SIZE(shader->config.lds_size) | S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); shader->ctx_reg.gs.vgt_gs_onchip_cntl = - S_028A44_ES_VERTS_PER_SUBGRP(gs_info.es_verts_per_subgroup) | - S_028A44_GS_PRIMS_PER_SUBGRP(gs_info.gs_prims_per_subgroup) | - S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_info.gs_inst_prims_in_subgroup); + S_028A44_ES_VERTS_PER_SUBGRP(shader->gs_info.es_verts_per_subgroup) | + S_028A44_GS_PRIMS_PER_SUBGRP(shader->gs_info.gs_prims_per_subgroup) | + S_028A44_GS_INST_PRIMS_IN_SUBGRP(shader->gs_info.gs_inst_prims_in_subgroup); shader->ctx_reg.gs.vgt_gs_max_prims_per_subgroup = - S_028A94_MAX_PRIMS_PER_SUBGROUP(gs_info.max_prims_per_subgroup); + S_028A94_MAX_PRIMS_PER_SUBGROUP(shader->gs_info.max_prims_per_subgroup); shader->ctx_reg.gs.vgt_esgs_ring_itemsize = shader->key.part.gs.es->esgs_itemsize / 4; -- 2.30.2