From 6b08d269bfa310e911c4d6a8ea8297a38ad599ac Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 25 May 2020 19:59:57 +0200 Subject: [PATCH] aco: implement 16-bit reduce operations on GFX6-GFX7 No fp16 on GFX6-GFX7. Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 60 +++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 05790366a71..df7b571c529 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -48,19 +48,67 @@ aco_opcode get_reduce_opcode(chip_class chip, ReduceOp op) { */ switch (op) { case iadd8: - case iadd16: return chip >= GFX10 ? aco_opcode::v_add_u32 : aco_opcode::v_add_u16; + case iadd16: + if (chip >= GFX10) { + return aco_opcode::v_add_u32; + } else if (chip >= GFX8) { + return aco_opcode::v_add_u16; + } else { + return aco_opcode::v_add_co_u32; + } + break; case imul8: - case imul16: return chip >= GFX10 ? aco_opcode::v_mul_lo_u16_e64 : aco_opcode::v_mul_lo_u16; + case imul16: + if (chip >= GFX10) { + return aco_opcode::v_mul_lo_u16_e64; + } else if (chip >= GFX8) { + return aco_opcode::v_mul_lo_u16; + } else { + return aco_opcode::v_mul_u32_u24; + } + break; case fadd16: return aco_opcode::v_add_f16; case fmul16: return aco_opcode::v_mul_f16; case imax8: - case imax16: return chip >= GFX10 ? aco_opcode::v_max_i32 : aco_opcode::v_max_i16; + case imax16: + if (chip >= GFX10) { + return aco_opcode::v_max_i32; + } else if (chip >= GFX8) { + return aco_opcode::v_max_i16; + } else { + return aco_opcode::v_max_i32; + } + break; case imin8: - case imin16: return chip >= GFX10 ? aco_opcode::v_min_i32 : aco_opcode::v_min_i16; + case imin16: + if (chip >= GFX10) { + return aco_opcode::v_min_i32; + } else if (chip >= GFX8) { + return aco_opcode::v_min_i16; + } else { + return aco_opcode::v_min_i32; + } + break; case umin8: - case umin16: return chip >= GFX10 ? aco_opcode::v_min_u32 : aco_opcode::v_min_u16; + case umin16: + if (chip >= GFX10) { + return aco_opcode::v_min_u32; + } else if (chip >= GFX8) { + return aco_opcode::v_min_u16; + } else { + return aco_opcode::v_min_u32; + } + break; case umax8: - case umax16: return chip >= GFX10 ? aco_opcode::v_max_u32 : aco_opcode::v_max_u16; + case umax16: + if (chip >= GFX10) { + return aco_opcode::v_max_u32; + } else if (chip >= GFX8) { + return aco_opcode::v_max_u16; + } else { + return aco_opcode::v_max_u32; + } + break; case fmin16: return aco_opcode::v_min_f16; case fmax16: return aco_opcode::v_max_f16; case iadd32: return chip >= GFX9 ? aco_opcode::v_add_u32 : aco_opcode::v_add_co_u32; -- 2.30.2