From: Samuel Pitoiset Date: Thu, 9 Jul 2020 08:06:43 +0000 (+0200) Subject: radv: align the LDS size in calculate_tess_lds_size() X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=cf89bdb9ba9c7a7ba39d142ee3c83e0379c2092a;p=mesa.git radv: align the LDS size in calculate_tess_lds_size() Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index f557364239a..0ef682dc135 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -1184,6 +1184,7 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir, nir_shader *vs) ctx->args->options->chip_class, ctx->args->options->family); unsigned lds_size = calculate_tess_lds_size( + ctx->args->options->chip_class, ctx->args->options->key.tcs.input_vertices, nir->info.tess.tcs_vertices_out, ctx->tcs_num_inputs, @@ -1192,7 +1193,7 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir, nir_shader *vs) ctx->tcs_num_patch_outputs); ctx->args->shader_info->tcs.num_patches = ctx->tcs_num_patches; - ctx->args->shader_info->tcs.lds_size = lds_size; + ctx->args->shader_info->tcs.num_lds_blocks = lds_size; ctx->program->config->lds_size = (lds_size + ctx->program->lds_alloc_granule - 1) / ctx->program->lds_alloc_granule; } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 987d1054356..cc98eef0b99 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -4142,8 +4142,9 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written); unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written); args->shader_info->tcs.num_patches = ctx.tcs_num_patches; - args->shader_info->tcs.lds_size = + args->shader_info->tcs.num_lds_blocks = calculate_tess_lds_size( + ctx.args->options->chip_class, ctx.args->options->key.tcs.input_vertices, ctx.shader->info.tess.tcs_vertices_out, ctx.tcs_num_inputs, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ed57f824e9b..e598ddbabdd 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -84,7 +84,7 @@ struct radv_dsa_order_invariance { struct radv_tessellation_state { uint32_t ls_hs_config; - unsigned lds_size; + unsigned num_lds_blocks; uint32_t tf_param; }; @@ -1991,7 +1991,6 @@ calculate_tess_state(struct radv_pipeline *pipeline, { unsigned num_tcs_input_cp; unsigned num_tcs_output_cp; - unsigned lds_size; unsigned num_patches; struct radv_tessellation_state tess = {0}; @@ -1999,17 +1998,7 @@ calculate_tess_state(struct radv_pipeline *pipeline, num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches; - lds_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.lds_size; - - if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { - assert(lds_size <= 65536); - lds_size = align(lds_size, 512) / 512; - } else { - assert(lds_size <= 32768); - lds_size = align(lds_size, 256) / 256; - } - - tess.lds_size = lds_size; + tess.num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks; tess.ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | @@ -3918,7 +3907,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, radeon_emit(cs, va >> 8); radeon_emit(cs, S_00B524_MEM_BASE(va >> 40)); - rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size); + rsrc2 |= S_00B52C_LDS_SIZE(tess->num_lds_blocks); if (pipeline->device->physical_device->rad_info.chip_class == GFX7 && pipeline->device->physical_device->rad_info.family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); @@ -4074,9 +4063,9 @@ radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, unsigned hs_rsrc2 = shader->config.rsrc2; if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { - hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(tess->lds_size); + hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(tess->num_lds_blocks); } else { - hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(tess->lds_size); + hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(tess->num_lds_blocks); } if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) { diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 21dd5188e1a..08d7fb9aff2 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -331,7 +331,7 @@ struct radv_shader_info { uint64_t tes_patch_inputs_read; unsigned tcs_vertices_out; uint32_t num_patches; - uint32_t lds_size; + uint32_t num_lds_blocks; uint8_t num_linked_inputs; uint8_t num_linked_outputs; uint8_t num_linked_patch_outputs; @@ -533,7 +533,8 @@ shader_io_get_unique_index(gl_varying_slot slot) } static inline unsigned -calculate_tess_lds_size(unsigned tcs_num_input_vertices, +calculate_tess_lds_size(enum chip_class chip_class, + unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices, unsigned tcs_num_inputs, unsigned tcs_num_patches, @@ -550,7 +551,17 @@ calculate_tess_lds_size(unsigned tcs_num_input_vertices, unsigned output_patch0_offset = input_patch_size * tcs_num_patches; - return output_patch0_offset + output_patch_size * tcs_num_patches; + unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches; + + if (chip_class >= GFX7) { + assert(lds_size <= 65536); + lds_size = align(lds_size, 512) / 512; + } else { + assert(lds_size <= 32768); + lds_size = align(lds_size, 256) / 256; + } + + return lds_size; } static inline unsigned