ctx->args->options->chip_class,
ctx->args->options->family);
unsigned lds_size = calculate_tess_lds_size(
+ ctx->args->options->chip_class,
ctx->args->options->key.tcs.input_vertices,
nir->info.tess.tcs_vertices_out,
ctx->tcs_num_inputs,
ctx->tcs_num_patch_outputs);
ctx->args->shader_info->tcs.num_patches = ctx->tcs_num_patches;
- ctx->args->shader_info->tcs.lds_size = lds_size;
+ ctx->args->shader_info->tcs.num_lds_blocks = lds_size;
ctx->program->config->lds_size = (lds_size + ctx->program->lds_alloc_granule - 1) /
ctx->program->lds_alloc_granule;
}
unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
- args->shader_info->tcs.lds_size =
+ args->shader_info->tcs.num_lds_blocks =
calculate_tess_lds_size(
+ ctx.args->options->chip_class,
ctx.args->options->key.tcs.input_vertices,
ctx.shader->info.tess.tcs_vertices_out,
ctx.tcs_num_inputs,
struct radv_tessellation_state {
uint32_t ls_hs_config;
- unsigned lds_size;
+ unsigned num_lds_blocks;
uint32_t tf_param;
};
{
unsigned num_tcs_input_cp;
unsigned num_tcs_output_cp;
- unsigned lds_size;
unsigned num_patches;
struct radv_tessellation_state tess = {0};
num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT
num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_patches;
- lds_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.lds_size;
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
- assert(lds_size <= 65536);
- lds_size = align(lds_size, 512) / 512;
- } else {
- assert(lds_size <= 32768);
- lds_size = align(lds_size, 256) / 256;
- }
-
- tess.lds_size = lds_size;
+ tess.num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
tess.ls_hs_config = S_028B58_NUM_PATCHES(num_patches) |
S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
- rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size);
+ rsrc2 |= S_00B52C_LDS_SIZE(tess->num_lds_blocks);
if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
unsigned hs_rsrc2 = shader->config.rsrc2;
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(tess->lds_size);
+ hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(tess->num_lds_blocks);
} else {
- hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(tess->lds_size);
+ hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(tess->num_lds_blocks);
}
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
uint64_t tes_patch_inputs_read;
unsigned tcs_vertices_out;
uint32_t num_patches;
- uint32_t lds_size;
+ uint32_t num_lds_blocks;
uint8_t num_linked_inputs;
uint8_t num_linked_outputs;
uint8_t num_linked_patch_outputs;
}
static inline unsigned
-calculate_tess_lds_size(unsigned tcs_num_input_vertices,
+calculate_tess_lds_size(enum chip_class chip_class,
+ unsigned tcs_num_input_vertices,
unsigned tcs_num_output_vertices,
unsigned tcs_num_inputs,
unsigned tcs_num_patches,
unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
- return output_patch0_offset + output_patch_size * tcs_num_patches;
+ unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
+
+ if (chip_class >= GFX7) {
+ assert(lds_size <= 65536);
+ lds_size = align(lds_size, 512) / 512;
+ } else {
+ assert(lds_size <= 32768);
+ lds_size = align(lds_size, 256) / 256;
+ }
+
+ return lds_size;
}
static inline unsigned