aco: Remember VS/TCS output driver locations.
authorTimur Kristóf <timur.kristof@gmail.com>
Mon, 30 Mar 2020 14:54:56 +0000 (16:54 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 29 Apr 2020 11:51:04 +0000 (11:51 +0000)
Instead of relying on calling shader_io_get_unique_index repeatedly,
remember the which output driver location corresponds to which
varying slot.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4388>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp

index 5a1629079174a4159261ce222937f0ab7192534c..f92462471674e75aedbfcda620d77c2d646f1fe3 100644 (file)
@@ -4224,10 +4224,12 @@ std::pair<Temp, unsigned> get_tcs_per_patch_output_vmem_offset(isel_context *ctx
 
 bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr *instr, bool per_vertex, uint64_t mask, bool *indirect)
 {
+   assert(per_vertex || ctx->shader->info.stage == MESA_SHADER_TESS_CTRL);
+
    if (mask == 0)
       return false;
 
-   unsigned off = nir_intrinsic_base(instr) * 4u;
+   unsigned drv_loc = nir_intrinsic_base(instr);
    nir_src *off_src = nir_get_io_offset_src(instr);
 
    if (!nir_src_is_const(*off_src)) {
@@ -4236,15 +4238,10 @@ bool tcs_driver_location_matches_api_mask(isel_context *ctx, nir_intrinsic_instr
    }
 
    *indirect = false;
-   off += nir_src_as_uint(*off_src) * 16u;
-
-   while (mask) {
-      unsigned slot = u_bit_scan64(&mask) + (per_vertex ? 0 : VARYING_SLOT_PATCH0);
-      if (off == shader_io_get_unique_index((gl_varying_slot) slot) * 16u)
-         return true;
-   }
-
-   return false;
+   uint64_t slot = per_vertex
+                   ? ctx->output_drv_loc_to_var_slot[ctx->shader->info.stage][drv_loc / 4]
+                   : (ctx->output_tcs_patch_drv_loc_to_var_slot[drv_loc / 4] - VARYING_SLOT_PATCH0);
+   return (((uint64_t) 1) << slot) & mask;
 }
 
 bool store_output_to_temps(isel_context *ctx, nir_intrinsic_instr *instr)
index f3464a7a21475d856872fce99a342be7bc81867f..c09d1459846b0baf357d1fefef212a05471df0bb 100644 (file)
@@ -113,6 +113,8 @@ struct isel_context {
    /* I/O information */
    shader_io_state inputs;
    shader_io_state outputs;
+   uint8_t output_drv_loc_to_var_slot[MESA_SHADER_COMPUTE][VARYING_SLOT_MAX];
+   uint8_t output_tcs_patch_drv_loc_to_var_slot[VARYING_SLOT_MAX];
 };
 
 Temp get_arg(isel_context *ctx, struct ac_arg arg)
@@ -798,6 +800,9 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir)
          variable->data.driver_location = variable->data.location * 4;
       else
          unreachable("Unsupported VS stage");
+
+      assert(variable->data.location >= 0 && variable->data.location <= UINT8_MAX);
+      ctx->output_drv_loc_to_var_slot[MESA_SHADER_VERTEX][variable->data.driver_location / 4] = variable->data.location;
    }
 
    if (ctx->stage == vertex_vs || ctx->stage == ngg_vertex_gs) {
@@ -910,6 +915,12 @@ setup_tcs_variables(isel_context *ctx, nir_shader *nir)
 
    nir_foreach_variable(variable, &nir->outputs) {
       variable->data.driver_location = shader_io_get_unique_index((gl_varying_slot) variable->data.location) * 4;
+      assert(variable->data.location >= 0 && variable->data.location <= UINT8_MAX);
+
+      if (variable->data.patch)
+         ctx->output_tcs_patch_drv_loc_to_var_slot[variable->data.driver_location / 4] = variable->data.location;
+      else
+         ctx->output_drv_loc_to_var_slot[MESA_SHADER_TESS_CTRL][variable->data.driver_location / 4] = variable->data.location;
    }
 
    ctx->tcs_tess_lvl_out_loc = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER) * 16u;