From: Samuel Pitoiset Date: Fri, 31 Jan 2020 07:23:02 +0000 (+0100) Subject: aco: fix MUBUF VS input loads when expanding vec3 to vec4 on GFX6 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0d14f41625fa00187f690f283c1eb6a22e354a71;p=mesa.git aco: fix MUBUF VS input loads when expanding vec3 to vec4 on GFX6 When some unused channels are skipped and that we expand vec3 loads to vec4 loads, we have to adjust the fourth component. While we are at it, add an assertion to make sure we don't use MUBUF for vec3 loads on GFX6. Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2450 Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2442 Fixes: 6aecc316 ("aco: fix VS input loads with MUBUF on GFX6") Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Tested-by: Marge Bot Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 6aab75eb774..d885f79bd63 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3230,6 +3230,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) while (channel_start < num_channels) { unsigned fetch_size = num_channels - channel_start; unsigned fetch_offset = attrib_offset + channel_start * vtx_info->chan_byte_size; + bool expanded = false; /* use MUBUF when possible to avoid possible alignment issues */ /* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */ @@ -3244,6 +3245,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) if (fetch_size == 3 && ctx->options->chip_class == GFX6) { /* GFX6 only supports loading vec3 with MTBUF, expand to vec4. */ fetch_size = 4; + expanded = true; } } @@ -3268,6 +3270,8 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) opcode = use_mubuf ? aco_opcode::buffer_load_dwordx2 : aco_opcode::tbuffer_load_format_xy; break; case 3: + assert(ctx->options->chip_class >= GFX7 || + (!use_mubuf && ctx->options->chip_class == GFX6)); opcode = use_mubuf ? aco_opcode::buffer_load_dwordx3 : aco_opcode::tbuffer_load_format_xyz; break; case 4: @@ -3279,7 +3283,8 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) Temp fetch_dst; if (channel_start == 0 && fetch_size == dst.size() && !post_shuffle && - (alpha_adjust == RADV_ALPHA_ADJUST_NONE || num_channels <= 3)) { + !expanded && (alpha_adjust == RADV_ALPHA_ADJUST_NONE || + num_channels <= 3)) { direct_fetch = true; fetch_dst = dst; } else {