From d9e357e35ba5364dd414684df7dd565adfe01592 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 13 Dec 2019 13:23:27 +0000 Subject: [PATCH] aco: skip unused channels at the start when fetching vertices MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit pipeline-db (Vega): Totals from affected shaders: SGPRS: 161320 -> 161224 (-0.06 %) VGPRS: 153968 -> 149408 (-2.96 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 4331496 -> 4331308 (-0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 27814 -> 28594 (2.80 %) pipeline-db (Navi): Totals from affected shaders: SGPRS: 161504 -> 161408 (-0.06 %) VGPRS: 153836 -> 149440 (-2.86 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 4327572 -> 4327604 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 27837 -> 28618 (2.81 %) Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 11f7805b56d..0847d5b6ca8 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3214,6 +3214,15 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) unsigned channel_start = 0; bool direct_fetch = false; + /* skip unused channels at the start */ + if (vtx_info->chan_byte_size && !post_shuffle) { + channel_start = ffs(mask) - 1; + for (unsigned i = 0; i < channel_start; i++) + channels[i] = Temp(0, s1); + } else if (vtx_info->chan_byte_size && post_shuffle && !(mask & 0x8)) { + num_channels = 3 - (ffs(mask) - 1); + } + /* load channels */ while (channel_start < num_channels) { unsigned fetch_size = num_channels - channel_start; @@ -3290,7 +3299,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) unsigned num_temp = 0; for (unsigned i = 0; i < dst.size(); i++) { unsigned idx = i + component; - if (idx < num_channels && channels[swizzle[idx]].id()) { + if (swizzle[idx] < num_channels && channels[swizzle[idx]].id()) { Temp channel = channels[swizzle[idx]]; if (idx == 3 && alpha_adjust != RADV_ALPHA_ADJUST_NONE) channel = adjust_vertex_fetch_alpha(ctx, alpha_adjust, channel); -- 2.30.2