From e1523b34c2aeebdf2952bfad4f0e40326fb2cc7c Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 4 Jun 2020 10:35:23 +0200 Subject: [PATCH] aco: fix sign-extend 8-bit subgroup operations on GFX6-GFX7 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit SDWA is GFX8+. Signed-off-by: Samuel Pitoiset Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 30 +++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index a278b66ce82..480dd32e6ce 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -568,15 +568,27 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig } if (src.regClass() == v1b) { - aco_ptr sdwa{create_instruction(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; - sdwa->operands[0] = Operand(PhysReg{tmp}, v1); - sdwa->definitions[0] = Definition(PhysReg{tmp}, v1); - if (reduce_op == imin8 || reduce_op == imax8) - sdwa->sel[0] = sdwa_sbyte; - else - sdwa->sel[0] = sdwa_ubyte; - sdwa->dst_sel = sdwa_udword; - bld.insert(std::move(sdwa)); + if (ctx->program->chip_class >= GFX8) { + aco_ptr sdwa{create_instruction(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; + sdwa->operands[0] = Operand(PhysReg{tmp}, v1); + sdwa->definitions[0] = Definition(PhysReg{tmp}, v1); + if (reduce_op == imin8 || reduce_op == imax8) + sdwa->sel[0] = sdwa_sbyte; + else + sdwa->sel[0] = sdwa_ubyte; + sdwa->dst_sel = sdwa_udword; + bld.insert(std::move(sdwa)); + } else { + aco_opcode opcode; + + if (reduce_op == imin8 || reduce_op == imax8) + opcode = aco_opcode::v_bfe_i32; + else + opcode = aco_opcode::v_bfe_u32; + + bld.vop3(opcode, Definition(PhysReg{tmp}, v1), + Operand(PhysReg{tmp}, v1), Operand(0u), Operand(8u)); + } } else if (src.regClass() == v2b) { if (ctx->program->chip_class >= GFX10 && (reduce_op == iadd16 || reduce_op == imax16 || -- 2.30.2