From ee4bc13de2aacb7bab24a3e55e44e7e50434df94 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 5 Jun 2020 08:54:52 +0200 Subject: [PATCH] aco: use v_bfe_u32 for unsigned reductions sign-extension on GFX6-GFX7 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Samuel Pitoiset Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 40d466904ef..a278b66ce82 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -591,7 +591,14 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig sdwa->dst_sel = sdwa_udword; bld.insert(std::move(sdwa)); } else if (ctx->program->chip_class == GFX6 || ctx->program->chip_class == GFX7) { - bld.vop3(aco_opcode::v_bfe_i32, Definition(PhysReg{tmp}, v1), + aco_opcode opcode; + + if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16) + opcode = aco_opcode::v_bfe_i32; + else + opcode = aco_opcode::v_bfe_u32; + + bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand(0u), Operand(16u)); } } -- 2.30.2