From bd75e9923302a3d389469b7b233968576a46f4de Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 22 Jul 2020 16:41:41 +0200 Subject: [PATCH] aco: ensure to not extract more components than have been fetched Fixes: 7015d2c249e1f7814bf5681ccd049e49e4d6495c ('aco: fix scratch loads which cross element_size boundaries') Cc: 20.1 Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0bc0a5c5433..1d7aae1be2e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -472,7 +472,7 @@ void byte_align_vector(isel_context *ctx, Temp vec, Operand offset, Temp dst, un offset = Operand(0u); } - unsigned num_components = dst.bytes() / component_size; + unsigned num_components = vec.bytes() / component_size; if (vec.regClass() == dst.regClass()) { assert(offset.constantValue() == 0); bld.copy(Definition(dst), vec); @@ -480,17 +480,18 @@ void byte_align_vector(isel_context *ctx, Temp vec, Operand offset, Temp dst, un return; } - emit_split_vector(ctx, vec, vec.bytes() / component_size); + emit_split_vector(ctx, vec, num_components); std::array elems; RegClass rc = RegClass(RegType::vgpr, component_size).as_subdword(); assert(offset.constantValue() % component_size == 0); unsigned skip = offset.constantValue() / component_size; - for (unsigned i = 0; i < num_components; i++) - elems[i] = emit_extract_vector(ctx, vec, i + skip, rc); + for (unsigned i = skip; i < num_components; i++) + elems[i - skip] = emit_extract_vector(ctx, vec, i, rc); /* if dst is vgpr - split the src and create a shrunk version according to the mask. */ if (dst.type() == RegType::vgpr) { + num_components = dst.bytes() / component_size; aco_ptr create_vec{create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, num_components, 1)}; for (unsigned i = 0; i < num_components; i++) create_vec->operands[i] = Operand(elems[i]); -- 2.30.2