aco: fix nir_op_f2f16_rtne with non-default rounding modes
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 2 Jul 2020 12:35:41 +0000 (13:35 +0100)
committerMarge Bot <eric+marge@anholt.net>
Fri, 17 Jul 2020 16:40:47 +0000 (16:40 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5773>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_lower_to_hw_instr.cpp
src/amd/compiler/aco_opcodes.py

index 5421bab2c7103f3049005eac45d58f6ea82e813c..c94a1b00e5ebb5134bc3c45a8d9fe95ca119e9de 100644 (file)
@@ -2298,7 +2298,13 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       Temp src = get_alu_src(ctx, instr->src[0]);
       if (instr->src[0].src.ssa->bit_size == 64)
          src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
       Temp src = get_alu_src(ctx, instr->src[0]);
       if (instr->src[0].src.ssa->bit_size == 64)
          src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
-      bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
+      if (instr->op == nir_op_f2f16_rtne && ctx->block->fp_mode.round16_64 != fp_round_ne)
+         /* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to
+          * keep value numbering and the scheduler simpler.
+          */
+         bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
+      else
+         bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
       break;
    }
    case nir_op_f2f16_rtz: {
       break;
    }
    case nir_op_f2f16_rtz: {
index f3903777257c31393589820b09a3869fbf1fe7e6..9af52aad9050b018b3bd246f1a34c4390913d3cb 100644 (file)
@@ -1908,6 +1908,17 @@ void lower_to_hw_instr(Program* program)
                            reduce->operands[2].physReg(), // vtmp
                            reduce->definitions[2].physReg(), // sitmp
                            reduce->operands[0], reduce->definitions[0]);
                            reduce->operands[2].physReg(), // vtmp
                            reduce->definitions[2].physReg(), // sitmp
                            reduce->operands[0], reduce->definitions[0]);
+         } else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) {
+            float_mode new_mode = block->fp_mode;
+            new_mode.round16_64 = fp_round_ne;
+            bool set_round = new_mode.round != block->fp_mode.round;
+
+            emit_set_mode(bld, new_mode, set_round, false);
+
+            instr->opcode = aco_opcode::v_cvt_f16_f32;
+            ctx.instructions.emplace_back(std::move(instr));
+
+            emit_set_mode(bld, block->fp_mode, set_round, false);
          } else {
             ctx.instructions.emplace_back(std::move(instr));
          }
          } else {
             ctx.instructions.emplace_back(std::move(instr));
          }
index 551fba81377fd9044cde72a713ddaa6bfecaa9c4..89e30d734f63870ff5d9848b778000d0e950133a 100644 (file)
@@ -734,6 +734,7 @@ VOP1 = {
    (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
    (0x09, 0x09,   -1,   -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
    (0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
    (0x09, 0x09,   -1,   -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
    (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
+   (  -1,   -1,   -1,   -1,   -1, "p_cvt_f16_f32_rtne", True, True),
    (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
    (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
    (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),
    (0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
    (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
    (0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),