aco: Don't store LS VS outputs to LDS when TCS doesn't need them.
authorTimur Kristóf <timur.kristof@gmail.com>
Thu, 26 Mar 2020 18:36:05 +0000 (19:36 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 30 Mar 2020 13:09:08 +0000 (13:09 +0000)
Totals:
Code Size: 254764624 -> 254745104 (-0.01 %) bytes

Totals from affected shaders:
VGPRS: 12132 -> 12112 (-0.16 %)
Code Size: 573364 -> 553844 (-3.40 %) bytes

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4165>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4165>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp

index 716853d23ce998dcf8a5c8c7f1325babb8a644b2..fa3d38e1be7294884a8b1aebe18cf02d18ea1665 100644 (file)
@@ -3366,8 +3366,13 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
    unsigned write_mask = nir_intrinsic_write_mask(instr);
    unsigned elem_size_bytes = instr->src[0].ssa->bit_size / 8u;
 
-   if (ctx->tcs_in_out_eq)
-      store_output_to_temps(ctx, instr);
+   if (ctx->tcs_in_out_eq && store_output_to_temps(ctx, instr)) {
+      /* When the TCS only reads this output directly and for the same vertices as its invocation id, it is unnecessary to store the VS output to LDS. */
+      bool indirect_write;
+      bool temp_only_input = tcs_driver_location_matches_api_mask(ctx, instr, true, ctx->tcs_temp_only_inputs, &indirect_write);
+      if (temp_only_input && !indirect_write)
+         return;
+   }
 
    if (ctx->stage == vertex_es || ctx->stage == tess_eval_es) {
       /* GFX6-8: ES stage is not merged into GS, data is passed from ES to GS in VMEM. */
index bd90dcae83d9b5c83b866577a6711510a2448897..644bc151fcbe484edbf0247cec32e4ae78bd5b3f 100644 (file)
@@ -103,6 +103,7 @@ struct isel_context {
    /* tessellation information */
    unsigned tcs_tess_lvl_out_loc;
    unsigned tcs_tess_lvl_in_loc;
+   uint64_t tcs_temp_only_inputs;
    uint32_t tcs_num_inputs;
    uint32_t tcs_num_patches;
    bool tcs_in_out_eq = false;
@@ -908,6 +909,12 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir)
       ctx->tcs_num_inputs = ctx->args->options->key.tcs.num_inputs;
    } else if (ctx->stage == vertex_tess_control_hs) {
       ctx->tcs_num_inputs = util_last_bit64(ctx->args->shader_info->vs.ls_outputs_written);
+
+      if (ctx->tcs_in_out_eq) {
+         ctx->tcs_temp_only_inputs = ~nir->info.tess.tcs_cross_invocation_inputs_read &
+                                     ~nir->info.inputs_read_indirectly &
+                                     nir->info.inputs_read;
+      }
    } else {
       unreachable("Unsupported TCS shader stage");
    }