From fdbb2968533be9a1caca731cf11c2ed3b46e6043 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Mon, 30 Mar 2020 16:54:56 +0200 Subject: [PATCH] aco: Remember VS/TCS output driver locations. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Instead of relying on calling shader_io_get_unique_index repeatedly, remember the which output driver location corresponds to which varying slot. Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 17 +++++++---------- .../aco_instruction_selection_setup.cpp | 11 +++++++++++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5a162907917..f9246247167 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4224,10 +4224,12 @@ std::pair get_tcs_per_patch_output_vmem_offset(isel_context *ctx bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex, uint64_t mask, bool *indirect) { + assert(per_vertex || ctx->shader->info.stage == MESA_SHADER_TESS_CTRL); + if (mask == 0) return false; - unsigned off = nir_intrinsic_base(instr) * 4u; + unsigned drv_loc = nir_intrinsic_base(instr); nir_src *off_src = nir_get_io_offset_src(instr); if (!nir_src_is_const(*off_src)) { @@ -4236,15 +4238,10 @@ bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr } *indirect = false; - off += nir_src_as_uint(*off_src) * 16u; - - while (mask) { - unsigned slot = u_bit_scan64(&mask) + (per_vertex ? 0 : VARYING_SLOT_PATCH0); - if (off == shader_io_get_unique_index((gl_varying_slot) slot) * 16u) - return true; - } - - return false; + uint64_t slot = per_vertex + ? ctx->output_drv_loc_to_var_slot[ctx->shader->info.stage][drv_loc / 4] + : (ctx->output_tcs_patch_drv_loc_to_var_slot[drv_loc / 4] - VARYING_SLOT_PATCH0); + return (((uint64_t) 1) << slot) & mask; } bool store_output_to_temps(isel_context *ctx, nir_intrinsic_instr *instr) diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index f3464a7a214..c09d1459846 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -113,6 +113,8 @@ struct isel_context { /* I/O information */ shader_io_state inputs; shader_io_state outputs; + uint8_t output_drv_loc_to_var_slot[MESA_SHADER_COMPUTE][VARYING_SLOT_MAX]; + uint8_t output_tcs_patch_drv_loc_to_var_slot[VARYING_SLOT_MAX]; }; Temp get_arg(isel_context *ctx, struct ac_arg arg) @@ -798,6 +800,9 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir) variable->data.driver_location = variable->data.location * 4; else unreachable("Unsupported VS stage"); + + assert(variable->data.location >= 0 && variable->data.location <= UINT8_MAX); + ctx->output_drv_loc_to_var_slot[MESA_SHADER_VERTEX][variable->data.driver_location / 4] = variable->data.location; } if (ctx->stage == vertex_vs || ctx->stage == ngg_vertex_gs) { @@ -910,6 +915,12 @@ setup_tcs_variables(isel_context *ctx, nir_shader *nir) nir_foreach_variable(variable, &nir->outputs) { variable->data.driver_location = shader_io_get_unique_index((gl_varying_slot) variable->data.location) * 4; + assert(variable->data.location >= 0 && variable->data.location <= UINT8_MAX); + + if (variable->data.patch) + ctx->output_tcs_patch_drv_loc_to_var_slot[variable->data.driver_location / 4] = variable->data.location; + else + ctx->output_drv_loc_to_var_slot[MESA_SHADER_TESS_CTRL][variable->data.driver_location / 4] = variable->data.location; } ctx->tcs_tess_lvl_out_loc = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER) * 16u; -- 2.30.2