aco: fix MUBUF VS input loads when expanding vec3 to vec4 on GFX6
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 31 Jan 2020 07:23:02 +0000 (08:23 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 31 Jan 2020 12:48:56 +0000 (13:48 +0100)
When some unused channels are skipped and that we expand vec3 loads
to vec4 loads, we have to adjust the fourth component.

While we are at it, add an assertion to make sure we don't use
MUBUF for vec3 loads on GFX6.

Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2450
Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2442
Fixes: 6aecc316 ("aco: fix VS input loads with MUBUF on GFX6")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3641>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3641>

src/amd/compiler/aco_instruction_selection.cpp

index 6aab75eb7746a3843d4b93d8fc57e98085d2f9a8..d885f79bd63c0d4ee4273b41db0131ab7dd77aa2 100644 (file)
@@ -3230,6 +3230,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
       while (channel_start < num_channels) {
          unsigned fetch_size = num_channels - channel_start;
          unsigned fetch_offset = attrib_offset + channel_start * vtx_info->chan_byte_size;
+         bool expanded = false;
 
          /* use MUBUF when possible to avoid possible alignment issues */
          /* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */
@@ -3244,6 +3245,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
             if (fetch_size == 3 && ctx->options->chip_class == GFX6) {
                /* GFX6 only supports loading vec3 with MTBUF, expand to vec4. */
                fetch_size = 4;
+               expanded = true;
             }
          }
 
@@ -3268,6 +3270,8 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
             opcode = use_mubuf ? aco_opcode::buffer_load_dwordx2 : aco_opcode::tbuffer_load_format_xy;
             break;
          case 3:
+            assert(ctx->options->chip_class >= GFX7 ||
+                   (!use_mubuf && ctx->options->chip_class == GFX6));
             opcode = use_mubuf ? aco_opcode::buffer_load_dwordx3 : aco_opcode::tbuffer_load_format_xyz;
             break;
          case 4:
@@ -3279,7 +3283,8 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
 
          Temp fetch_dst;
          if (channel_start == 0 && fetch_size == dst.size() && !post_shuffle &&
-             (alpha_adjust == RADV_ALPHA_ADJUST_NONE || num_channels <= 3)) {
+             !expanded && (alpha_adjust == RADV_ALPHA_ADJUST_NONE ||
+                           num_channels <= 3)) {
             direct_fetch = true;
             fetch_dst = dst;
          } else {