From b36950ad2c044967ea1b53917c0b068637492f77 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 2 Jul 2020 13:35:41 +0100 Subject: [PATCH] aco: fix nir_op_f2f16_rtne with non-default rounding modes Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 8 +++++++- src/amd/compiler/aco_lower_to_hw_instr.cpp | 11 +++++++++++ src/amd/compiler/aco_opcodes.py | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5421bab2c71..c94a1b00e5e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2298,7 +2298,13 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp src = get_alu_src(ctx, instr->src[0]); if (instr->src[0].src.ssa->bit_size == 64) src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src); - bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); + if (instr->op == nir_op_f2f16_rtne && ctx->block->fp_mode.round16_64 != fp_round_ne) + /* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to + * keep value numbering and the scheduler simpler. + */ + bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src); + else + bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); break; } case nir_op_f2f16_rtz: { diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index f3903777257..9af52aad905 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1908,6 +1908,17 @@ void lower_to_hw_instr(Program* program) reduce->operands[2].physReg(), // vtmp reduce->definitions[2].physReg(), // sitmp reduce->operands[0], reduce->definitions[0]); + } else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) { + float_mode new_mode = block->fp_mode; + new_mode.round16_64 = fp_round_ne; + bool set_round = new_mode.round != block->fp_mode.round; + + emit_set_mode(bld, new_mode, set_round, false); + + instr->opcode = aco_opcode::v_cvt_f16_f32; + ctx.instructions.emplace_back(std::move(instr)); + + emit_set_mode(bld, block->fp_mode, set_round, false); } else { ctx.instructions.emplace_back(std::move(instr)); } diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 551fba81377..89e30d734f6 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -734,6 +734,7 @@ VOP1 = { (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False), (0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9 (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True), + ( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True), (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True), (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False), (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False), -- 2.30.2