radv: Refactor calculate_tess_lds_size and get_tcs_num_patches.
authorTimur Kristóf <timur.kristof@gmail.com>
Mon, 30 Mar 2020 14:04:53 +0000 (16:04 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 29 Apr 2020 11:51:04 +0000 (11:51 +0000)
Previously these functions needed the bit mask of the TCS outputs
and patch outputs written, and concluded the number of outputs
from that.

Now, they take the number of outputs and patch outputs instead.
This will allow the backend compiler to better optimize the
LDS layout.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4388>

src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_shader.h

index 80280319673e7c6dba7ea8af95cadf497afff655..bf9e96e0b1cf28ea7685799877e90827f68d222d 100644 (file)
@@ -105,6 +105,8 @@ struct isel_context {
    unsigned tcs_tess_lvl_in_loc;
    uint64_t tcs_temp_only_inputs;
    uint32_t tcs_num_inputs;
+   uint32_t tcs_num_outputs;
+   uint32_t tcs_num_patch_outputs;
    uint32_t tcs_num_patches;
    bool tcs_in_out_eq = false;
 
@@ -871,12 +873,15 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
       unreachable("Unsupported TCS shader stage");
    }
 
+   ctx->tcs_num_outputs = util_last_bit64(ctx->args->shader_info->tcs.outputs_written);
+   ctx->tcs_num_patch_outputs = util_last_bit64(ctx->args->shader_info->tcs.patch_outputs_written);
+
    ctx->tcs_num_patches = get_tcs_num_patches(
                              ctx->args->options->key.tcs.input_vertices,
                              nir->info.tess.tcs_vertices_out,
                              ctx->tcs_num_inputs,
-                             ctx->args->shader_info->tcs.outputs_written,
-                             ctx->args->shader_info->tcs.patch_outputs_written,
+                             ctx->tcs_num_outputs,
+                             ctx->tcs_num_patch_outputs,
                              ctx->args->options->tess_offchip_block_dw_size,
                              ctx->args->options->chip_class,
                              ctx->args->options->family);
@@ -885,8 +890,8 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
                              nir->info.tess.tcs_vertices_out,
                              ctx->tcs_num_inputs,
                              ctx->tcs_num_patches,
-                             ctx->args->shader_info->tcs.outputs_written,
-                             ctx->args->shader_info->tcs.patch_outputs_written);
+                             ctx->tcs_num_outputs,
+                             ctx->tcs_num_patch_outputs);
 
    ctx->args->shader_info->tcs.num_patches = ctx->tcs_num_patches;
    ctx->args->shader_info->tcs.lds_size = lds_size;
index a40467a3194c09f7dc529ee4e92d233d690f07ba..3f214f79b92b825db4a8d2bdf28ee9f4835783ce 100644 (file)
@@ -4004,13 +4004,15 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                                ctx.tcs_num_inputs = args->options->key.tcs.num_inputs;
                        else
                                ctx.tcs_num_inputs = util_last_bit64(args->shader_info->vs.ls_outputs_written);
+                       unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+                       unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
                        ctx.tcs_num_patches =
                                get_tcs_num_patches(
                                        ctx.args->options->key.tcs.input_vertices,
                                        ctx.shader->info.tess.tcs_vertices_out,
                                        ctx.tcs_num_inputs,
-                                       ctx.args->shader_info->tcs.outputs_written,
-                                       ctx.args->shader_info->tcs.patch_outputs_written,
+                                       tcs_num_outputs,
+                                       tcs_num_patch_outputs,
                                        ctx.args->options->tess_offchip_block_dw_size,
                                        ctx.args->options->chip_class,
                                        ctx.args->options->family);
@@ -4114,6 +4116,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                }
 
                if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+                       unsigned tcs_num_outputs = util_last_bit64(ctx.args->shader_info->tcs.outputs_written);
+                       unsigned tcs_num_patch_outputs = util_last_bit64(ctx.args->shader_info->tcs.patch_outputs_written);
                        args->shader_info->tcs.num_patches = ctx.tcs_num_patches;
                        args->shader_info->tcs.lds_size =
                                calculate_tess_lds_size(
@@ -4121,8 +4125,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
                                        ctx.shader->info.tess.tcs_vertices_out,
                                        ctx.tcs_num_inputs,
                                        ctx.tcs_num_patches,
-                                       ctx.args->shader_info->tcs.outputs_written,
-                                       ctx.args->shader_info->tcs.patch_outputs_written);
+                                       tcs_num_outputs,
+                                       tcs_num_patch_outputs);
                }
        }
 
index 165df3afe2e548ea9b9d75a7fcce3dd56f6d6655..608900b5419fbc2846604d7332496376d7c6d0cb 100644 (file)
@@ -527,19 +527,16 @@ calculate_tess_lds_size(unsigned tcs_num_input_vertices,
                        unsigned tcs_num_output_vertices,
                        unsigned tcs_num_inputs,
                        unsigned tcs_num_patches,
-                       unsigned tcs_outputs_written,
-                       unsigned tcs_per_patch_outputs_written)
+                       unsigned tcs_num_outputs,
+                       unsigned tcs_num_patch_outputs)
 {
-       unsigned num_tcs_outputs = util_last_bit64(tcs_outputs_written);
-       unsigned num_tcs_patch_outputs = util_last_bit64(tcs_per_patch_outputs_written);
-
        unsigned input_vertex_size = tcs_num_inputs * 16;
-       unsigned output_vertex_size = num_tcs_outputs * 16;
+       unsigned output_vertex_size = tcs_num_outputs * 16;
 
        unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
 
        unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-       unsigned output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+       unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
 
        unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
 
@@ -550,19 +547,17 @@ static inline unsigned
 get_tcs_num_patches(unsigned tcs_num_input_vertices,
                        unsigned tcs_num_output_vertices,
                        unsigned tcs_num_inputs,
-                       unsigned tcs_outputs_written,
-                       unsigned tcs_per_patch_outputs_written,
+                       unsigned tcs_num_outputs,
+                       unsigned tcs_num_patch_outputs,
                        unsigned tess_offchip_block_dw_size,
                        enum chip_class chip_class,
                        enum radeon_family family)
 {
        uint32_t input_vertex_size = tcs_num_inputs * 16;
        uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
-       uint32_t num_tcs_outputs = util_last_bit64(tcs_outputs_written);
-       uint32_t num_tcs_patch_outputs = util_last_bit64(tcs_per_patch_outputs_written);
-       uint32_t output_vertex_size = num_tcs_outputs * 16;
+       uint32_t output_vertex_size = tcs_num_outputs * 16;
        uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
-       uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+       uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
 
        /* Ensure that we only need one wave per SIMD so we don't need to check
         * resource usage. Also ensures that the number of tcs in and out